From 51ebd3181572af8d5076808dab2682d800f6da5d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 22 Oct 2012 03:42:09 +0000 Subject: ipv6: add support of equal cost multipath (ECMP) Each nexthop is added like a single route in the routing table. All routes that have the same metric/weight and destination but not the same gateway are considering as ECMP routes. They are linked together, through a list called rt6i_siblings. ECMP routes can be added in one shot, with RTA_MULTIPATH attribute or one after the other (in both case, the flag NLM_F_EXCL should not be set). The patch is based on a previous work from Luc Saillard . Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 57 ++++++++++++++++++++++ net/ipv6/route.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 190 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 24995a9..710cafd 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -672,6 +672,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, iter->rt6i_idev == rt->rt6i_idev && ipv6_addr_equal(&iter->rt6i_gateway, &rt->rt6i_gateway)) { + if (rt->rt6i_nsiblings) + rt->rt6i_nsiblings = 0; if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->rt6i_flags & RTF_EXPIRES)) @@ -680,6 +682,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt6_set_expires(iter, rt->dst.expires); return -EEXIST; } + /* If we have the same destination and the same metric, + * but not the same gateway, then the route we try to + * add is sibling to this route, increment our counter + * of siblings, and later we will add our route to the + * list. + * Only static routes (which don't have flag + * RTF_EXPIRES) are used for ECMPv6. + * + * To avoid long list, we only had siblings if the + * route have a gateway. + */ + if (rt->rt6i_flags & RTF_GATEWAY && + !(rt->rt6i_flags & RTF_EXPIRES) && + !(iter->rt6i_flags & RTF_EXPIRES)) + rt->rt6i_nsiblings++; } if (iter->rt6i_metric > rt->rt6i_metric) @@ -692,6 +709,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (ins == &fn->leaf) fn->rr_ptr = NULL; + /* Link this route to others same route. */ + if (rt->rt6i_nsiblings) { + unsigned int rt6i_nsiblings; + struct rt6_info *sibling, *temp_sibling; + + /* Find the first route that have the same metric */ + sibling = fn->leaf; + while (sibling) { + if (sibling->rt6i_metric == rt->rt6i_metric) { + list_add_tail(&rt->rt6i_siblings, + &sibling->rt6i_siblings); + break; + } + sibling = sibling->dst.rt6_next; + } + /* For each sibling in the list, increment the counter of + * siblings. BUG() if counters does not match, list of siblings + * is broken! + */ + rt6i_nsiblings = 0; + list_for_each_entry_safe(sibling, temp_sibling, + &rt->rt6i_siblings, rt6i_siblings) { + sibling->rt6i_nsiblings++; + BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); + rt6i_nsiblings++; + } + BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + } + /* * insert node */ @@ -1193,6 +1239,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (fn->rr_ptr == rt) fn->rr_ptr = NULL; + /* Remove this entry from other siblings */ + if (rt->rt6i_nsiblings) { + struct rt6_info *sibling, *next_sibling; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->rt6i_siblings, rt6i_siblings) + sibling->rt6i_nsiblings--; + rt->rt6i_nsiblings = 0; + list_del_init(&rt->rt6i_siblings); + } + /* Adjust walkers */ read_lock(&fib6_walker_lock); FOR_WALKERS(w) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7c7e963..126da56 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -57,6 +57,7 @@ #include #include #include +#include #include @@ -289,6 +290,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); rt->rt6i_genid = rt_genid(net); + INIT_LIST_HEAD(&rt->rt6i_siblings); + rt->rt6i_nsiblings = 0; } return rt; } @@ -385,6 +388,69 @@ static bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +/* Multipath route selection: + * Hash based function using packet header and flowlabel. + * Adapted from fib_info_hashfn() + */ +static int rt6_info_hash_nhsfn(unsigned int candidate_count, + const struct flowi6 *fl6) +{ + unsigned int val = fl6->flowi6_proto; + + val ^= fl6->daddr.s6_addr32[0]; + val ^= fl6->daddr.s6_addr32[1]; + val ^= fl6->daddr.s6_addr32[2]; + val ^= fl6->daddr.s6_addr32[3]; + + val ^= fl6->saddr.s6_addr32[0]; + val ^= fl6->saddr.s6_addr32[1]; + val ^= fl6->saddr.s6_addr32[2]; + val ^= fl6->saddr.s6_addr32[3]; + + /* Work only if this not encapsulated */ + switch (fl6->flowi6_proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_SCTP: + val ^= fl6->fl6_sport; + val ^= fl6->fl6_dport; + break; + + case IPPROTO_ICMPV6: + val ^= fl6->fl6_icmp_type; + val ^= fl6->fl6_icmp_code; + break; + } + /* RFC6438 recommands to use flowlabel */ + val ^= fl6->flowlabel; + + /* Perhaps, we need to tune, this function? */ + val = val ^ (val >> 7) ^ (val >> 12); + return val % candidate_count; +} + +static struct rt6_info *rt6_multipath_select(struct rt6_info *match, + struct flowi6 *fl6) +{ + struct rt6_info *sibling, *next_sibling; + int route_choosen; + + route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); + /* Don't change the route, if route_choosen == 0 + * (siblings does not include ourself) + */ + if (route_choosen) + list_for_each_entry_safe(sibling, next_sibling, + &match->rt6i_siblings, rt6i_siblings) { + route_choosen--; + if (route_choosen == 0) { + match = sibling; + break; + } + } + return match; +} + /* * Route lookup. Any table->tb6_lock is implied. */ @@ -702,6 +768,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, restart: rt = fn->leaf; rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); + if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); @@ -863,7 +931,8 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - + if (rt->rt6i_nsiblings && oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -2249,6 +2318,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2326,11 +2396,65 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_TABLE]) cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); + if (tb[RTA_MULTIPATH]) { + cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); + cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + } + err = 0; errout: return err; } +static int ip6_route_multipath(struct fib6_config *cfg, int add) +{ + struct fib6_config r_cfg; + struct rtnexthop *rtnh; + int remaining; + int attrlen; + int err = 0, last_err = 0; + +beginning: + rtnh = (struct rtnexthop *)cfg->fc_mp; + remaining = cfg->fc_mp_len; + + /* Parse a Multipath Entry */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_flags |= RTF_GATEWAY; + } + } + err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); + if (err) { + last_err = err; + /* If we are trying to remove a route, do not stop the + * loop when ip6_route_del() fails (because next hop is + * already gone), we should try to remove all next hops. + */ + if (add) { + /* If add fails, we should try to delete all + * next hops that have been already added. + */ + add = 0; + goto beginning; + } + } + rtnh = rtnh_next(rtnh, &remaining); + } + + return last_err; +} + static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct fib6_config cfg; @@ -2340,7 +2464,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_del(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 0); + else + return ip6_route_del(&cfg); } static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -2352,7 +2479,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_add(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 1); + else + return ip6_route_add(&cfg); } static inline size_t rt6_nlmsg_size(void) -- cgit v1.1 From f3f121359caa069cefbc48008e94bcd862ca21e2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 22 Oct 2012 21:41:48 +0000 Subject: ipv6: tcp: clean up tcp_v6_early_demux() icsk variable Remove an icsk variable, which by convention should refer to an inet_connection_sock rather than an inet_sock. In the process, make the tcp_v6_early_demux() code and formatting a bit more like tcp_v4_early_demux(), to ease comparisons and maintenance. Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 26175bf..bb6782e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1741,11 +1741,11 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; - struct inet_sock *icsk = inet_sk(sk); + if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == skb->skb_iif) + inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } -- cgit v1.1 From b3ce5ae1fb6ba45c70e7c4d144182d38f0b0aef7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 22 Oct 2012 23:35:06 +0000 Subject: ipv6: fix sparse warnings in rt6_info_hash_nhsfn() Adding by commit 51ebd3181572 which adds the support of ECMP for IPv6. Spotted-by: Fengguang Wu Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 126da56..c42650c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -397,32 +397,32 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count, { unsigned int val = fl6->flowi6_proto; - val ^= fl6->daddr.s6_addr32[0]; - val ^= fl6->daddr.s6_addr32[1]; - val ^= fl6->daddr.s6_addr32[2]; - val ^= fl6->daddr.s6_addr32[3]; + val ^= (__force u32)fl6->daddr.s6_addr32[0]; + val ^= (__force u32)fl6->daddr.s6_addr32[1]; + val ^= (__force u32)fl6->daddr.s6_addr32[2]; + val ^= (__force u32)fl6->daddr.s6_addr32[3]; - val ^= fl6->saddr.s6_addr32[0]; - val ^= fl6->saddr.s6_addr32[1]; - val ^= fl6->saddr.s6_addr32[2]; - val ^= fl6->saddr.s6_addr32[3]; + val ^= (__force u32)fl6->saddr.s6_addr32[0]; + val ^= (__force u32)fl6->saddr.s6_addr32[1]; + val ^= (__force u32)fl6->saddr.s6_addr32[2]; + val ^= (__force u32)fl6->saddr.s6_addr32[3]; /* Work only if this not encapsulated */ switch (fl6->flowi6_proto) { case IPPROTO_UDP: case IPPROTO_TCP: case IPPROTO_SCTP: - val ^= fl6->fl6_sport; - val ^= fl6->fl6_dport; + val ^= (__force u16)fl6->fl6_sport; + val ^= (__force u16)fl6->fl6_dport; break; case IPPROTO_ICMPV6: - val ^= fl6->fl6_icmp_type; - val ^= fl6->fl6_icmp_code; + val ^= (__force u16)fl6->fl6_icmp_type; + val ^= (__force u16)fl6->fl6_icmp_code; break; } /* RFC6438 recommands to use flowlabel */ - val ^= fl6->flowlabel; + val ^= (__force u32)fl6->flowlabel; /* Perhaps, we need to tune, this function? */ val = val ^ (val >> 7) ^ (val >> 12); -- cgit v1.1 From f3a1bfb11ccbc72d44f0b58c92115a40128979c3 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:50 +0000 Subject: rtnl/ipv6: use netconf msg to advertise forwarding status Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0424e4e..0c57a8f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_PRIVACY #include @@ -460,6 +461,72 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) return idev; } +static int inet6_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + if (type == NETCONFA_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv6_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET6; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + if (type == NETCONFA_FORWARDING && + nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -486,6 +553,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + dev->ifindex, &idev->cnf); } @@ -518,6 +587,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) *p = newf; if (p == &net->ipv6.devconf_dflt->forwarding) { + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); rtnl_unlock(); return 0; } @@ -525,6 +598,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); rtnl_unlock(); -- cgit v1.1 From 76f8f6cb76b110aaace90b6208b1ceb46bd78b7f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:51 +0000 Subject: rtnl/ipv6: add support of RTM_GETNETCONF This message allows to get the devconf for an interface. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0c57a8f..8f0b12a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -466,7 +466,8 @@ static int inet6_netconf_msgsize_devconf(int type) int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + nla_total_size(4); /* NETCONFA_IFINDEX */ - if (type == NETCONFA_FORWARDING) + /* type -1 is used for ALL */ + if (type == -1 || type == NETCONFA_FORWARDING) size += nla_total_size(4); return size; @@ -491,7 +492,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; - if (type == NETCONFA_FORWARDING && + /* type -1 is used for ALL */ + if ((type == -1 || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; @@ -527,6 +529,73 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); } +static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, + [NETCONFA_FORWARDING] = { .len = sizeof(int) }, +}; + +static int inet6_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX+1]; + struct netconfmsg *ncm; + struct sk_buff *skb; + struct ipv6_devconf *devconf; + struct inet6_dev *in6_dev; + struct net_device *dev; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_ipv6_policy); + if (err < 0) + goto errout; + + err = EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + switch (ifindex) { + case NETCONFA_IFINDEX_ALL: + devconf = net->ipv6.devconf_all; + break; + case NETCONFA_IFINDEX_DEFAULT: + devconf = net->ipv6.devconf_dflt; + break; + default: + dev = __dev_get_by_index(net, ifindex); + if (dev == NULL) + goto errout; + in6_dev = __in6_dev_get(dev); + if (in6_dev == NULL) + goto errout; + devconf = &in6_dev->cnf; + break; + } + + err = -ENOBUFS; + skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + -1); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -4861,6 +4930,8 @@ int __init addrconf_init(void) inet6_dump_ifmcaddr, NULL); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr, NULL); + __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, + NULL, NULL); ipv6_addr_label_rtnl_register(); -- cgit v1.1 From 6229b75d8da5a4eed7bb668de757e252986c2305 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 28 Oct 2012 14:40:44 +0000 Subject: netfilter: nf_nat: use PTR_RET Use PTR_RET rather than if(IS_ERR(...)) + PTR_ERR Generated by: coccinelle/api/ptr_ret.cocci Reported-by: Fengguang Wu Signed-off-by: Fengguang Wu Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6table_nat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index e418bd6..4c8219e 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -275,9 +275,7 @@ static int __net_init ip6table_nat_net_init(struct net *net) return -ENOMEM; net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_nat)) - return PTR_ERR(net->ipv6.ip6table_nat); - return 0; + return PTR_RET(net->ipv6.ip6table_nat); } static void __net_exit ip6table_nat_net_exit(struct net *net) -- cgit v1.1 From 07a936260a94ae4798527ce8f79d4f3b589ab8a3 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 29 Oct 2012 16:23:10 +0000 Subject: ipv6: use IS_ENABLED() #if defined(CONFIG_FOO) || defined(CONFIG_FOO_MODULE) can be replaced by #if IS_ENABLED(CONFIG_FOO) Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 20 ++++++++++---------- net/ipv6/ah6.c | 10 +++++----- net/ipv6/datagram.c | 2 +- net/ipv6/exthdrs.c | 18 +++++++++--------- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 5 ++--- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 6 ++---- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 ++-- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 6 +++--- net/ipv6/netfilter/nf_nat_proto_icmpv6.c | 2 +- net/ipv6/raw.c | 6 +++--- net/ipv6/xfrm6_policy.c | 4 ++-- net/ipv6/xfrm6_state.c | 4 ++-- 15 files changed, 46 insertions(+), 49 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8f0b12a..387b813 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -402,7 +402,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) ndev->cnf.accept_dad = -1; -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { pr_info("%s: Disabled Multicast RS\n", dev->name); ndev->cnf.rtr_solicits = 0; @@ -1838,7 +1838,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) cfg.fc_flags |= RTF_NONEXTHOP; #endif @@ -1898,7 +1898,7 @@ static void addrconf_add_mroute(struct net_device *dev) ip6_route_add(&cfg); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void sit_route_add(struct net_device *dev) { struct fib6_config cfg = { @@ -2250,7 +2250,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) if (dev == NULL) goto err_exit; -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT) { const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; @@ -2461,7 +2461,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, } } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void sit_add_v4_addrs(struct inet6_dev *idev) { struct in6_addr addr; @@ -2580,7 +2580,7 @@ static void addrconf_dev_config(struct net_device *dev) addrconf_add_linklocal(idev, &addr); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void addrconf_sit_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2617,7 +2617,7 @@ static void addrconf_sit_config(struct net_device *dev) } #endif -#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +#if IS_ENABLED(CONFIG_NET_IPGRE) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2747,12 +2747,12 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } switch (dev->type) { -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) case ARPHRD_SIT: addrconf_sit_config(dev); break; #endif -#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +#if IS_ENABLED(CONFIG_NET_IPGRE) case ARPHRD_IPGRE: addrconf_gre_config(dev); break; @@ -3340,7 +3340,7 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7e61395..ecc35b9 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -44,7 +44,7 @@ #define IPV6HDR_BASELEN 8 struct tmp_ext { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) struct in6_addr saddr; #endif struct in6_addr daddr; @@ -152,7 +152,7 @@ bad: return false; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) /** * ipv6_rearrange_destopt - rearrange IPv6 destination options header * @iph: IPv6 header @@ -320,7 +320,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err) memcpy(top_iph, iph_base, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(&top_iph->saddr, iph_ext, extlen); #else memcpy(&top_iph->daddr, iph_ext, extlen); @@ -385,7 +385,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(iph_base, top_iph, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(iph_ext, &top_iph->saddr, extlen); #else memcpy(iph_ext, &top_iph->daddr, extlen); @@ -434,7 +434,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph, iph_base, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(&top_iph->saddr, iph_ext, extlen); #else memcpy(&top_iph->daddr, iph_ext, extlen); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index be2b67d6..93cbad2 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -769,7 +769,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); switch (rthdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index fa3d9c3..f005acc 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -43,7 +43,7 @@ #include #include #include -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif @@ -224,7 +224,7 @@ bad: Destination options header. *****************************/ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) { struct ipv6_destopt_hao *hao; @@ -288,7 +288,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) #endif static const struct tlvtype_proc tlvprocdestopt_lst[] = { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) { .type = IPV6_TLV_HAO, .func = ipv6_dest_hao, @@ -300,7 +300,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = { static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) __u16 dstbuf; #endif struct dst_entry *dst = skb_dst(skb); @@ -315,14 +315,14 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) } opt->lastopt = opt->dst1 = skb_network_header_len(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) dstbuf = opt->dst1; #endif if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; @@ -378,7 +378,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was * processed by own @@ -404,7 +404,7 @@ looped_back: } switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (accept_source_route < 0) goto unknown_rh; @@ -461,7 +461,7 @@ looped_back: addr += i - 1; switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 24d69db..b4a9fd5 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -280,7 +280,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) static void mip6_addr_swap(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index aece3e7..e10c77b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -538,8 +538,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) to->nf_trace = from->nf_trace; #endif skb_copy_secmark(to, from); @@ -564,7 +563,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) found_rhdr = 1; break; case NEXTHDR_DEST: -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ba6d13d..a7bee6a 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -397,7 +397,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d7cb045..10ce76a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -207,8 +207,7 @@ ip6t_get_target_c(const struct ip6t_entry *e) return ip6t_get_target((struct ip6t_entry *)e); } -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* This cries for unification! */ static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", @@ -381,8 +380,7 @@ ip6t_do_table(struct sk_buff *skb, t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(skb, hook, in, out, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 8860d23..ccb5cbe 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -295,7 +295,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { }, }; -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include #include @@ -346,7 +346,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, .get_l4proto = ipv6_get_l4proto, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv6_tuple_to_nlattr, .nlattr_tuple_size = ipv6_nlattr_tuple_size, .nlattr_to_tuple = ipv6_nlattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 2d54b20..24df3dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include #include @@ -375,7 +375,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .get_timeouts = icmpv6_get_timeouts, .new = icmpv6_new, .error = icmpv6_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, .nlattr_to_tuple = icmpv6_nlattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index cdd6d04..aacd121 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -19,7 +19,7 @@ #include #include -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include #include #include @@ -35,7 +35,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif @@ -60,7 +60,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, { struct sk_buff *reasm; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c index 5d6da78..61aaf70 100644 --- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -84,7 +84,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { .manip_pkt = icmpv6_manip_pkt, .in_range = icmpv6_in_range, .unique_tuple = icmpv6_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d8e95c7..6cd29b1 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,7 +50,7 @@ #include #include #include -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif #include @@ -123,7 +123,7 @@ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) return 1; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); static mh_filter_t __rcu *mh_filter __read_mostly; @@ -184,7 +184,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) filtered = icmpv6_filter(sk, skb); break; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: { /* XXX: To validate MH only once for each packet, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f8c4c08..f3ed8ca 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -20,7 +20,7 @@ #include #include #include -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif @@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl6->flowi6_proto = nexthdr; return; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 3f2f7c4..d8c70b8 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -101,7 +101,7 @@ static int __xfrm6_state_sort_cmp(void *p) return 1; else return 3; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: return 2; @@ -134,7 +134,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p) switch (v->mode) { case XFRM_MODE_TRANSPORT: return 1; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: return 2; -- cgit v1.1 From 121d1e0941e05c64ee4223064dd83eb24e871739 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Oct 2012 01:08:49 +0000 Subject: netfilter: ipv6: add getsockopt to retrieve origdst userspace can query the original ipv4 destination address of a REDIRECTed connection via getsockopt(m_sock, SOL_IP, SO_ORIGINAL_DST, &m_server_addr, &addrsize) but for ipv6 no such option existed. This adds getsockopt(..., IPPROTO_IPV6, IP6T_SO_ORIGINAL_DST, ...). Without this, userspace needs to parse /proc or use ctnetlink, which appears to be overkill. This uses option number 80 for IP6T_SO_ORIGINAL_DST, which is spare, to use the same number we use in the IPv4 socket option SO_ORIGINAL_DST. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 61 ++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 8860d23..02dcafd 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -295,6 +296,50 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { }, }; +static int +ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *inet6 = inet6_sk(sk); + const struct nf_conntrack_tuple_hash *h; + struct sockaddr_in6 sin6; + struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; + struct nf_conn *ct; + + tuple.src.u3.in6 = inet6->rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.in6 = inet6->daddr; + tuple.dst.u.tcp.port = inet->inet_dport; + tuple.dst.protonum = sk->sk_protocol; + + if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) + return -ENOPROTOOPT; + + if (*len < 0 || (unsigned int) *len < sizeof(sin6)) + return -EINVAL; + + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + if (!h) { + pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", + &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; + } + + ct = nf_ct_tuplehash_to_ctrack(h); + + sin6.sin6_family = AF_INET6; + sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; + sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; + memcpy(&sin6.sin6_addr, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, + sizeof(sin6.sin6_addr)); + sin6.sin6_scope_id = sk->sk_bound_dev_if; + + nf_ct_put(ct); + return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; +} + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include @@ -359,6 +404,14 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI "); +static struct nf_sockopt_ops so_getorigdst6 = { + .pf = NFPROTO_IPV6, + .get_optmin = IP6T_SO_ORIGINAL_DST, + .get_optmax = IP6T_SO_ORIGINAL_DST + 1, + .get = ipv6_getorigdst, + .owner = THIS_MODULE, +}; + static int ipv6_net_init(struct net *net) { int ret = 0; @@ -425,6 +478,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) need_conntrack(); nf_defrag_ipv6_enable(); + ret = nf_register_sockopt(&so_getorigdst6); + if (ret < 0) { + pr_err("Unable to register netfilter socket option\n"); + return ret; + } + ret = register_pernet_subsys(&ipv6_net_ops); if (ret < 0) goto cleanup_pernet; @@ -440,6 +499,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) cleanup_ipv6: unregister_pernet_subsys(&ipv6_net_ops); cleanup_pernet: + nf_unregister_sockopt(&so_getorigdst6); return ret; } @@ -448,6 +508,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); + nf_unregister_sockopt(&so_getorigdst6); } module_init(nf_conntrack_l3proto_ipv6_init); -- cgit v1.1 From 1a72418bd7f0edcb85c817964efd370254fe749d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 1 Nov 2012 22:58:22 +0000 Subject: ipv6/multipath: remove flag NLM_F_EXCL after the first nexthop fib6_add_rt2node() will reject the nexthop if this flag is set, so we perform the check only for the first nexthop. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c42650c..9c7b5d8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2449,6 +2449,12 @@ beginning: goto beginning; } } + /* Because each route is added like a single route we remove + * this flag after the first nexthop (if there is a collision, + * we have already fail to add the first nexthop: + * fib6_add_rt2node() has reject it). + */ + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; rtnh = rtnh_next(rtnh, &remaining); } -- cgit v1.1 From e6c022a4fa2d2d9ca9d0a7ac3b05ad988f39fc30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Oct 2012 23:16:46 +0000 Subject: tcp: better retrans tracking for defer-accept For passive TCP connections using TCP_DEFER_ACCEPT facility, we incorrectly increment req->retrans each time timeout triggers while no SYNACK is sent. SYNACK are not sent for TCP_DEFER_ACCEPT that were established (for which we received the ACK from client). Only the last SYNACK is sent so that we can receive again an ACK from client, to move the req into accept queue. We plan to change this later to avoid the useless retransmit (and potential problem as this SYNACK could be lost) TCP_INFO later gives wrong information to user, claiming imaginary retransmits. Decouple req->retrans field into two independent fields : num_retrans : number of retransmit num_timeout : number of timeouts num_timeout is the counter that is incremented at each timeout, regardless of actual SYNACK being sent or not, and used to compute the exponential timeout. Introduce inet_rtx_syn_ack() helper to increment num_retrans only if ->rtx_syn_ack() succeeded. Use inet_rtx_syn_ack() from tcp_check_req() to increment num_retrans when we re-send a SYNACK in answer to a (retransmitted) SYN. Prior to this patch, we were not counting these retransmits. Change tcp_v[46]_rtx_synack() to increment TCP_MIB_RETRANSSEGS only if a synack packet was successfully queued. Reported-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Julian Anastasov Cc: Vijay Subramanian Cc: Elliott Hughes Cc: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 182ab9a..4016197 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq6->iif = inet6_iif(skb); req->expires = 0UL; - req->retrans = 0; + req->num_retrans = 0; ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb6782e..c73d0eb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -495,9 +495,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { struct flowi6 fl6; + int res; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + if (!res) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return res; } static void tcp_v6_reqsk_destructor(struct request_sock *req) @@ -1364,7 +1367,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); tcp_synack_rtt_meas(newsk, req); - newtp->total_retrans = req->retrans; + newtp->total_retrans = req->num_retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; @@ -1866,7 +1869,7 @@ static void get_openreq6(struct seq_file *seq, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), - req->retrans, + req->num_timeout, from_kuid_munged(seq_user_ns(seq), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ -- cgit v1.1 From 1a9408355e91c21eaf4626386d65988a0ad7dc21 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Sun, 28 Oct 2012 17:43:53 +0000 Subject: ipv6: remove a useless NULL check In dev_forward_change(), it is useless to check if idev->dev is NULL, it is always non-NULL here. Reported-by: Fengguang Wu Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 387b813..ced58e1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -607,7 +607,7 @@ static void dev_forward_change(struct inet6_dev *idev) dev = idev->dev; if (idev->cnf.forwarding) dev_disable_lro(dev); - if (dev && (dev->flags & IFF_MULTICAST)) { + if (dev->flags & IFF_MULTICAST) { if (idev->cnf.forwarding) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); else -- cgit v1.1 From 94e187c01512c9cf29e2ff54bf1a1b045f38293d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 29 Oct 2012 00:13:19 +0000 Subject: ipv6: introduce ip6_rt_put() As suggested by Eric, we could introduce a helper function for ipv6 too, to avoid checking if rt is NULL before dst_release(). Cc: Eric Dumazet Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 7 +++---- net/ipv6/anycast.c | 2 +- net/ipv6/fib6_rules.c | 2 +- net/ipv6/ip6_gre.c | 2 +- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/ip6_tunnel.c | 5 ++--- net/ipv6/mcast.c | 4 ++-- net/ipv6/ndisc.c | 7 +++---- net/ipv6/netfilter/ip6t_rpfilter.c | 2 +- net/ipv6/route.c | 14 +++++++------- 10 files changed, 23 insertions(+), 26 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ced58e1..fab23db 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -699,7 +699,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warn("Freeing alive inet6 address %p\n", ifp); return; } - dst_release(&ifp->rt->dst); + ip6_rt_put(ifp->rt); kfree_rcu(ifp, rcu); } @@ -951,7 +951,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) rt6_set_expires(rt, expires); } } - dst_release(&rt->dst); + ip6_rt_put(rt); } /* clean up prefsrc entries */ @@ -2027,8 +2027,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, dev, expires, flags); } - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); } /* Try to figure out our local address for this prefix */ diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index cdf02be..4963c76 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } else if (ishost) { err = -EADDRNOTAVAIL; goto error; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index d9fb911..2e1a432 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -100,7 +100,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } again: - dst_release(&rt->dst); + ip6_rt_put(rt); rt = NULL; goto out; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0185679..bbe2e7b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1069,7 +1069,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) dev->mtu = IPV6_MIN_MTU; } } - dst_release(&rt->dst); + ip6_rt_put(rt); } t->hlen = addend; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e10c77b..3deaa4e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -755,7 +755,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - dst_release(&rt->dst); + ip6_rt_put(rt); return 0; } @@ -767,7 +767,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - dst_release(&rt->dst); + ip6_rt_put(rt); return err; slow_path_clean: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cb7e2de..09482f7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -663,8 +663,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_send(skb2, rel_type, rel_code, rel_info); - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); kfree_skb(skb2); } @@ -1208,7 +1207,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dst_release(&rt->dst); + ip6_rt_put(rt); } } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 92f8e48..b19ed51 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -163,7 +163,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } } else dev = dev_get_by_index_rcu(net, ifindex); @@ -260,7 +260,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } } else dev = dev_get_by_index_rcu(net, ifindex); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ff36194..ae0cf81 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1145,7 +1145,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - dst_release(&rt->dst); + ip6_rt_put(rt); return; } } @@ -1170,7 +1170,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - dst_release(&rt->dst); + ip6_rt_put(rt); return; } neigh->flags |= NTF_ROUTER; @@ -1326,8 +1326,7 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); if (neigh) neigh_release(neigh); } diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5d1d8b0..5060d54 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -67,7 +67,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) ret = true; out: - dst_release(&rt->dst); + ip6_rt_put(rt); return ret; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c7b5d8..c1cfcb7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -732,7 +732,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, else rt6_set_expires(rt, jiffies + HZ * lifetime); - dst_release(&rt->dst); + ip6_rt_put(rt); } return 0; } @@ -948,7 +948,7 @@ restart: else goto out2; - dst_release(&rt->dst); + ip6_rt_put(rt); rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->dst); @@ -965,7 +965,7 @@ restart: * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ - dst_release(&rt->dst); + ip6_rt_put(rt); goto relookup; out: @@ -1576,7 +1576,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; if (dev) { if (dev != grt->dst.dev) { - dst_release(&grt->dst); + ip6_rt_put(grt); goto out; } } else { @@ -1587,7 +1587,7 @@ int ip6_route_add(struct fib6_config *cfg) } if (!(grt->rt6i_flags & RTF_GATEWAY)) err = 0; - dst_release(&grt->dst); + ip6_rt_put(grt); if (err) goto out; @@ -1673,7 +1673,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) write_unlock_bh(&table->tb6_lock); out: - dst_release(&rt->dst); + ip6_rt_put(rt); return err; } @@ -2732,7 +2732,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { - dst_release(&rt->dst); + ip6_rt_put(rt); err = -ENOBUFS; goto errout; } -- cgit v1.1 From b20b6d972624ff024023012e38a067cb5086270e Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 7 Nov 2012 05:05:38 +0000 Subject: ndisc: fix a typo in a comment in ndisc_recv_na() Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ae0cf81..a2e50dc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -906,7 +906,7 @@ static void ndisc_recv_na(struct sk_buff *skb) if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { - /* XXX: idev->cnf.prixy_ndp */ + /* XXX: idev->cnf.proxy_ndp */ goto out; } -- cgit v1.1 From a4477c4ddb5d3552b4d204f49047bdbb097c4450 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 7 Nov 2012 21:56:33 +0000 Subject: ipv6: remove rt6i_peer_genid from rt6_info and its handler 6431cbc25f(Create a mechanism for upward inetpeer propagation into routes) introduces these codes, but this mechanism is never enabled since rt6i_peer_genid always is zero whether it is not assigned or assigned by rt6_peer_genid(). After 5943634fc5 (ipv4: Maintain redirect and PMTU info in struct rtable again), the ipv4 related codes of this mechanism has been removed, I think we maybe able to remove them now. Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/route.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c1cfcb7..6863f8b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -321,13 +321,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); - -static u32 rt6_peer_genid(void) -{ - return atomic_read(&__rt6_peer_genid); -} - void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer_base *base; @@ -341,8 +334,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create) if (peer) { if (!rt6_set_peer(rt, peer)) inet_putpeer(peer); - else - rt->rt6i_peer_genid = rt6_peer_genid(); } } @@ -1099,14 +1090,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { - if (rt->rt6i_peer_genid != rt6_peer_genid()) { - if (!rt6_has_peer(rt)) - rt6_bind_peer(rt, 0); - rt->rt6i_peer_genid = rt6_peer_genid(); - } + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) return dst; - } + return NULL; } -- cgit v1.1 From ba3e3f50a0e5de76fc0684d856394931f2bc39fa Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 9 Nov 2012 06:10:00 +0000 Subject: sit: advertise tunnel param via rtnl It is usefull for daemons that monitor link event to have the full parameters of these interfaces when a rtnl message is sent. It allows also to dump them via rtnetlink. It is based on what is done for GRE tunnels. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3ed54ff..b543c56 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -68,6 +68,7 @@ static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; struct sit_net { @@ -282,6 +283,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, goto failed_free; strcpy(nt->parms.name, dev->name); + dev->rtnl_link_ops = &sit_link_ops; dev_hold(dev); @@ -1216,6 +1218,47 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static size_t sit_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(4) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(4) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_TOS */ + nla_total_size(1) + + 0; +} + +static int sit_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops sit_link_ops __read_mostly = { + .kind = "sit", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip_tunnel), + .get_size = sit_get_size, + .fill_info = sit_fill_info, +}; + static struct xfrm_tunnel sit_handler __read_mostly = { .handler = ipip6_rcv, .err_handler = ipip6_err, @@ -1302,6 +1345,7 @@ static struct pernet_operations sit_net_ops = { static void __exit sit_cleanup(void) { + rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); unregister_pernet_device(&sit_net_ops); @@ -1319,10 +1363,21 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - unregister_pernet_device(&sit_net_ops); pr_info("%s: can't add protocol\n", __func__); + goto xfrm_tunnel_failed; } + err = rtnl_link_register(&sit_link_ops); + if (err < 0) + goto rtnl_link_failed; + +out: return err; + +rtnl_link_failed: + xfrm4_tunnel_deregister(&sit_handler, AF_INET6); +xfrm_tunnel_failed: + unregister_pernet_device(&sit_net_ops); + goto out; } module_init(sit_init); -- cgit v1.1 From c075b13098b399dc565b4d53f42047a8d40ed3ba Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 9 Nov 2012 06:10:01 +0000 Subject: ip6tnl: advertise tunnel param via rtnl It is usefull for daemons that monitor link event to have the full parameters of these interfaces when a rtnl message is sent. It allows also to dump them via rtnetlink. It is based on what is done for GRE tunnels. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 09482f7..424ed45 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -83,6 +83,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); +static struct rtnl_link_ops ip6_link_ops __read_mostly; static int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { @@ -299,6 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) goto failed_free; strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); @@ -1504,6 +1506,55 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } +static size_t ip6_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_IPTUN_FLOWINFO */ + nla_total_size(4) + + /* IFLA_IPTUN_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), + &parm->raddr) || + nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), + &parm->laddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || + nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || + nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ip6_link_ops __read_mostly = { + .kind = "ip6tnl", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip6_tnl), + .get_size = ip6_get_size, + .fill_info = ip6_fill_info, +}; + static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { .handler = ip4ip6_rcv, .err_handler = ip4ip6_err, @@ -1612,9 +1663,14 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + err = rtnl_link_register(&ip6_link_ops); + if (err < 0) + goto rtnl_link_failed; return 0; +rtnl_link_failed: + xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); out_ip4ip6: @@ -1629,6 +1685,7 @@ out_pernet: static void __exit ip6_tunnel_cleanup(void) { + rtnl_link_unregister(&ip6_link_ops); if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) pr_info("%s: can't deregister ip4ip6\n", __func__); -- cgit v1.1 From f8f626754ebeca613cf1af2e6f890cfde0e74d5b Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 9 Nov 2012 17:05:07 -0800 Subject: ipv6: Move ipv6_find_hdr() out of Netfilter code. Open vSwitch will soon also use ipv6_find_hdr() so this moves it out of Netfilter-specific code into a more common location. Signed-off-by: Jesse Gross --- net/ipv6/exthdrs_core.c | 103 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/netfilter/ip6_tables.c | 103 ---------------------------------------- 2 files changed, 103 insertions(+), 103 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index f73d59a..8ea253a 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -111,3 +111,106 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, return start; } EXPORT_SYMBOL(ipv6_skip_exthdr); + +/* + * find the offset to specified header or the protocol number of last header + * if target < 0. "last header" is transport protocol header, ESP, or + * "No next header". + * + * Note that *offset is used as input/output parameter. an if it is not zero, + * then it must be a valid offset to an inner IPv6 header. This can be used + * to explore inner IPv6 header, eg. ICMPv6 error messages. + * + * If target header is found, its offset is set in *offset and return protocol + * number. Otherwise, return -1. + * + * If the first fragment doesn't contain the final protocol header or + * NEXTHDR_NONE it is considered invalid. + * + * Note that non-1st fragment is special case that "the protocol number + * of last header" is "next header" field in Fragment header. In this case, + * *offset is meaningless and fragment offset is stored in *fragoff if fragoff + * isn't NULL. + * + * if flags is not NULL and it's a fragment, then the frag flag IP6_FH_F_FRAG + * will be set. If it's an AH header, the IP6_FH_F_AUTH flag is set and + * target < 0, then this function will stop at the AH header. + */ +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, + int target, unsigned short *fragoff, int *flags) +{ + unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); + u8 nexthdr = ipv6_hdr(skb)->nexthdr; + unsigned int len; + + if (fragoff) + *fragoff = 0; + + if (*offset) { + struct ipv6hdr _ip6, *ip6; + + ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); + if (!ip6 || (ip6->version != 6)) { + printk(KERN_ERR "IPv6 header not found\n"); + return -EBADMSG; + } + start = *offset + sizeof(struct ipv6hdr); + nexthdr = ip6->nexthdr; + } + len = skb->len - start; + + while (nexthdr != target) { + struct ipv6_opt_hdr _hdr, *hp; + unsigned int hdrlen; + + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { + if (target < 0) + break; + return -ENOENT; + } + + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -EBADMSG; + if (nexthdr == NEXTHDR_FRAGMENT) { + unsigned short _frag_off; + __be16 *fp; + + if (flags) /* Indicate that this is a fragment */ + *flags |= IP6_FH_F_FRAG; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -EBADMSG; + + _frag_off = ntohs(*fp) & ~0x7; + if (_frag_off) { + if (target < 0 && + ((!ipv6_ext_hdr(hp->nexthdr)) || + hp->nexthdr == NEXTHDR_NONE)) { + if (fragoff) + *fragoff = _frag_off; + return hp->nexthdr; + } + return -ENOENT; + } + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) { + if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0)) + break; + hdrlen = (hp->hdrlen + 2) << 2; + } else + hdrlen = ipv6_optlen(hp); + + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *offset = start; + return nexthdr; +} +EXPORT_SYMBOL(ipv6_find_hdr); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d7cb045..1ce4f15 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2273,112 +2273,9 @@ static void __exit ip6_tables_fini(void) unregister_pernet_subsys(&ip6_tables_net_ops); } -/* - * find the offset to specified header or the protocol number of last header - * if target < 0. "last header" is transport protocol header, ESP, or - * "No next header". - * - * Note that *offset is used as input/output parameter. an if it is not zero, - * then it must be a valid offset to an inner IPv6 header. This can be used - * to explore inner IPv6 header, eg. ICMPv6 error messages. - * - * If target header is found, its offset is set in *offset and return protocol - * number. Otherwise, return -1. - * - * If the first fragment doesn't contain the final protocol header or - * NEXTHDR_NONE it is considered invalid. - * - * Note that non-1st fragment is special case that "the protocol number - * of last header" is "next header" field in Fragment header. In this case, - * *offset is meaningless and fragment offset is stored in *fragoff if fragoff - * isn't NULL. - * - * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG - * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and - * target < 0, then this function will stop at the AH header. - */ -int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, - int target, unsigned short *fragoff, int *flags) -{ - unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); - u8 nexthdr = ipv6_hdr(skb)->nexthdr; - unsigned int len; - - if (fragoff) - *fragoff = 0; - - if (*offset) { - struct ipv6hdr _ip6, *ip6; - - ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); - if (!ip6 || (ip6->version != 6)) { - printk(KERN_ERR "IPv6 header not found\n"); - return -EBADMSG; - } - start = *offset + sizeof(struct ipv6hdr); - nexthdr = ip6->nexthdr; - } - len = skb->len - start; - - while (nexthdr != target) { - struct ipv6_opt_hdr _hdr, *hp; - unsigned int hdrlen; - - if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) - break; - return -ENOENT; - } - - hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) - return -EBADMSG; - if (nexthdr == NEXTHDR_FRAGMENT) { - unsigned short _frag_off; - __be16 *fp; - - if (flags) /* Indicate that this is a fragment */ - *flags |= IP6T_FH_F_FRAG; - fp = skb_header_pointer(skb, - start+offsetof(struct frag_hdr, - frag_off), - sizeof(_frag_off), - &_frag_off); - if (fp == NULL) - return -EBADMSG; - - _frag_off = ntohs(*fp) & ~0x7; - if (_frag_off) { - if (target < 0 && - ((!ipv6_ext_hdr(hp->nexthdr)) || - hp->nexthdr == NEXTHDR_NONE)) { - if (fragoff) - *fragoff = _frag_off; - return hp->nexthdr; - } - return -ENOENT; - } - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) { - if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0)) - break; - hdrlen = (hp->hdrlen + 2) << 2; - } else - hdrlen = ipv6_optlen(hp); - - nexthdr = hp->nexthdr; - len -= hdrlen; - start += hdrlen; - } - - *offset = start; - return nexthdr; -} - EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); -EXPORT_SYMBOL(ipv6_find_hdr); module_init(ip6_tables_init); module_exit(ip6_tables_fini); -- cgit v1.1 From 9fafd65ad407d4e0c96919a325f568dd95d032af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?YOSHIFUJI=20Hideaki=20/=20=E5=90=89=E8=97=A4=E8=8B=B1?= =?UTF-8?q?=E6=98=8E?= Date: Mon, 12 Nov 2012 07:50:17 +0000 Subject: ipv6 ndisc: Use pre-defined in6addr_linklocal_allnodes. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4f47aa5..6ba4b54 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -535,7 +535,6 @@ static void ndisc_send_unsol_na(struct net_device *dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; - struct in6_addr mcaddr = IN6ADDR_LINKLOCAL_ALLNODES_INIT; idev = in6_dev_get(dev); if (!idev) @@ -543,7 +542,7 @@ static void ndisc_send_unsol_na(struct net_device *dev) read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { - ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr, + ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, /*inc_opt=*/ true); -- cgit v1.1 From 9195bb8e381d81d5a315f911904cdf0cfcc919b8 Mon Sep 17 00:00:00 2001 From: Ansis Atteka Date: Fri, 9 Nov 2012 17:11:31 -0800 Subject: ipv6: improve ipv6_find_hdr() to skip empty routing headers This patch prepares ipv6_find_hdr() function so that it could be able to skip routing headers, where segements_left is 0. This is required to handle multiple routing header case correctly when changing IPv6 addresses. Signed-off-by: Ansis Atteka Signed-off-by: Jesse Gross --- net/ipv6/exthdrs_core.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 8ea253a..11b4e29 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -132,9 +132,11 @@ EXPORT_SYMBOL(ipv6_skip_exthdr); * *offset is meaningless and fragment offset is stored in *fragoff if fragoff * isn't NULL. * - * if flags is not NULL and it's a fragment, then the frag flag IP6_FH_F_FRAG - * will be set. If it's an AH header, the IP6_FH_F_AUTH flag is set and - * target < 0, then this function will stop at the AH header. + * if flags is not NULL and it's a fragment, then the frag flag + * IP6_FH_F_FRAG will be set. If it's an AH header, the + * IP6_FH_F_AUTH flag is set and target < 0, then this function will + * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this + * function will skip all those routing headers, where segements_left was 0. */ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff, int *flags) @@ -142,6 +144,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; unsigned int len; + bool found; if (fragoff) *fragoff = 0; @@ -159,9 +162,10 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, } len = skb->len - start; - while (nexthdr != target) { + do { struct ipv6_opt_hdr _hdr, *hp; unsigned int hdrlen; + found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { if (target < 0) @@ -172,6 +176,20 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (hp == NULL) return -EBADMSG; + + if (nexthdr == NEXTHDR_ROUTING) { + struct ipv6_rt_hdr _rh, *rh; + + rh = skb_header_pointer(skb, start, sizeof(_rh), + &_rh); + if (rh == NULL) + return -EBADMSG; + + if (flags && (*flags & IP6_FH_F_SKIP_RH) && + rh->segments_left == 0) + found = false; + } + if (nexthdr == NEXTHDR_FRAGMENT) { unsigned short _frag_off; __be16 *fp; @@ -205,10 +223,12 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, } else hdrlen = ipv6_optlen(hp); - nexthdr = hp->nexthdr; - len -= hdrlen; - start += hdrlen; - } + if (!found) { + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + } while (!found); *offset = start; return nexthdr; -- cgit v1.1 From d3976a53ce1f4763cb910d047e8763e4c696e5f7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Nov 2012 00:17:24 +0000 Subject: netfilter: ipv6: only provide sk_bound_dev_if for link-local addr yoshfuji points out that sk_bound_dev_if should only be provided for link-local addresses. IPv6 getpeer/sockname also has this test, i.e. we will now only set sin6_scope_id if the original(!) destination was a link-local address. Reported-by: YOSHIFUJI Hideaki Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 02dcafd..e5f6cf7 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -334,9 +334,14 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) memcpy(&sin6.sin6_addr, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, sizeof(sin6.sin6_addr)); - sin6.sin6_scope_id = sk->sk_bound_dev_if; nf_ct_put(ct); + + if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) + sin6.sin6_scope_id = sk->sk_bound_dev_if; + else + sin6.sin6_scope_id = 0; + return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; } -- cgit v1.1 From 5cb04436eef62aa8f5c482f8ec8deba391dea465 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 6 Nov 2012 16:46:20 +0000 Subject: ipv6: add knob to send unsolicited ND on link-layer address change This patch introduces a new knob ndisc_notify. If enabled, the kernel will transmit an unsolicited neighbour advertisement on link-layer address change to update the neighbour tables of the corresponding hosts more quickly. This is the equivalent to arp_notify in ipv4 world. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 8 ++++++++ net/ipv6/ndisc.c | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fab23db..cb803b7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4037,6 +4037,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; + array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; } static inline size_t inet6_ifla6_size(void) @@ -4705,6 +4706,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec }, { + .procname = "ndisc_notify", + .data = &ipv6_devconf.ndisc_notify, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { /* sentinel */ } }, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6ba4b54..f41853b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1572,11 +1572,18 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; struct net *net = dev_net(dev); + struct inet6_dev *idev; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(~0UL, net); + idev = in6_dev_get(dev); + if (!idev) + break; + if (idev->cnf.ndisc_notify) + ndisc_send_unsol_na(dev); + in6_dev_put(idev); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); -- cgit v1.1 From aa0010f880ab542da3ad0e72992f2dc518ac68a0 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Sun, 11 Nov 2012 21:52:33 +0000 Subject: net: convert __IPTUNNEL_XMIT() to an inline function __IPTUNNEL_XMIT() is an ugly macro, convert it to a static inline function, so make it more readable. IPTUNNEL_XMIT() is unused, just remove it. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 9 --------- net/ipv6/ip6_tunnel.c | 8 -------- net/ipv6/sit.c | 14 +------------- 3 files changed, 1 insertion(+), 30 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 12aa473..672101d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -116,15 +116,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr) #define for_each_ip_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) -/* often modified stats are per cpu, other are shared (netdev->stats) */ -struct pcpu_tstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 424ed45..8db4d9b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -95,14 +95,6 @@ struct ip6_tnl_net { struct ip6_tnl __rcu **tnls[2]; }; -/* often modified stats are per cpu, other are shared (netdev->stats) */ -struct pcpu_tstats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; -} __attribute__((aligned(4*sizeof(unsigned long)))); - static struct net_device_stats *ip6_get_stats(struct net_device *dev) { struct pcpu_tstats sum = { 0 }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b543c56..ffe83ef 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -88,15 +88,6 @@ struct sit_net { #define for_each_ip_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) -/* often modified stats are per cpu, other are shared (netdev->stats) */ -struct pcpu_tstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) { @@ -685,7 +676,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct pcpu_tstats *tstats; const struct iphdr *tiph = &tunnel->parms.iph; const struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; @@ -866,9 +856,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; - nf_reset(skb); - tstats = this_cpu_ptr(dev->tstats); - __IPTUNNEL_XMIT(tstats, &dev->stats); + iptunnel_xmit(skb, dev); return NETDEV_TX_OK; tx_error_icmp: -- cgit v1.1 From e086cadc08e259150b2ab8f7f4a16dbf9e3c2f22 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Sun, 11 Nov 2012 21:52:34 +0000 Subject: net: unify for_each_ip_tunnel_rcu() The defitions of for_each_ip_tunnel_rcu() are same, so unify it. Also, don't hide the parameter 't'. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 14 ++++---------- net/ipv6/sit.c | 13 +++---------- 2 files changed, 7 insertions(+), 20 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 672101d..823fd64 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -109,12 +109,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr) #define tunnels_r tunnels[2] #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] -/* - * Locking : hash tables are protected by RCU and RTNL - */ - -#define for_each_ip_tunnel_rcu(start) \ - for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) @@ -172,7 +166,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, ARPHRD_ETHER : ARPHRD_IP6GRE; int score, cand_score = 4; - for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { + for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_equal(remote, &t->parms.raddr) || key != t->parms.i_key || @@ -197,7 +191,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, } } - for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { + for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { if (!ipv6_addr_equal(remote, &t->parms.raddr) || key != t->parms.i_key || !(t->dev->flags & IFF_UP)) @@ -221,7 +215,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, } } - for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { + for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { if ((!ipv6_addr_equal(local, &t->parms.laddr) && (!ipv6_addr_equal(local, &t->parms.raddr) || !ipv6_addr_is_multicast(local))) || @@ -247,7 +241,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, } } - for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { + for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { if (t->parms.i_key != key || !(t->dev->flags & IFF_UP)) continue; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ffe83ef..5bce2f6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -81,13 +81,6 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; -/* - * Locking : hash tables are protected by RCU and RTNL - */ - -#define for_each_ip_tunnel_rcu(start) \ - for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) - static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *tot) { @@ -133,20 +126,20 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); - for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) { + for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) { + for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) { + for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) -- cgit v1.1 From c75ea260400aaea8100caa012a0b1958ca094840 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 11 Nov 2012 20:16:08 +0000 Subject: ipv6: remove obsolete comments in route.c Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3045872..11249d2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1371,12 +1371,6 @@ out: return entries > rt_max_size; } -/* Clean host part of a prefix. Not necessary in radix tree, - but results in cleaner routing tables. - - Remove it only when all the things will work! - */ - int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); -- cgit v1.1 From cfa323b6b98f44ddf46cc987f74a23dcab697134 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:13:58 +0000 Subject: ip6tnl/rtnl: add IFLA_IPTUN_PROTO on dump IPv6 tunnels can have three mode: 4in6, 6in6 and xin6. This information was missing in the netlink message. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8db4d9b..929ba0b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1515,6 +1515,8 @@ static size_t ip6_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_IPTUN_FLAGS */ nla_total_size(4) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + 0; } @@ -1531,7 +1533,8 @@ static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || - nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; return 0; -- cgit v1.1 From b58d731acc599b69fc50cb40e13f30f0f16fcb3f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:13:59 +0000 Subject: ip6tnl: rename rtnl functions for consistency Functions in this file start with ip6_tnl_. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 929ba0b..c054847 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1498,7 +1498,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } -static size_t ip6_get_size(const struct net_device *dev) +static size_t ip6_tnl_get_size(const struct net_device *dev) { return /* IFLA_IPTUN_LINK */ @@ -1520,7 +1520,7 @@ static size_t ip6_get_size(const struct net_device *dev) 0; } -static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) +static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); struct __ip6_tnl_parm *parm = &tunnel->parms; @@ -1546,8 +1546,8 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = { .kind = "ip6tnl", .maxtype = IFLA_IPTUN_MAX, .priv_size = sizeof(struct ip6_tnl), - .get_size = ip6_get_size, - .fill_info = ip6_fill_info, + .get_size = ip6_tnl_get_size, + .fill_info = ip6_tnl_fill_info, }; static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { -- cgit v1.1 From 0b112457229d8a17198a02f3cca32922d2e374f1 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:14:00 +0000 Subject: ip6tnl: add support of link creation via rtnl This patch add the support of 'ip link .. type ip6tnl'. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 164 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 149 insertions(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c054847..ab4d056 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -251,6 +251,33 @@ static void ip6_dev_free(struct net_device *dev) free_netdev(dev); } +static int ip6_tnl_create2(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + int err; + + t = netdev_priv(dev); + err = ip6_tnl_dev_init(dev); + if (err < 0) + goto out; + + err = register_netdevice(dev); + if (err < 0) + goto out; + + strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &ip6_link_ops; + + dev_hold(dev); + ip6_tnl_link(ip6n, t); + return 0; + +out: + return err; +} + /** * ip6_tnl_create - create a new tunnel * @p: tunnel parameters @@ -269,7 +296,6 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) struct ip6_tnl *t; char name[IFNAMSIZ]; int err; - struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (p->name[0]) strlcpy(name, p->name, IFNAMSIZ); @@ -284,18 +310,10 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) t = netdev_priv(dev); t->parms = *p; - err = ip6_tnl_dev_init(dev); + err = ip6_tnl_create2(dev); if (err < 0) goto failed_free; - if ((err = register_netdevice(dev)) < 0) - goto failed_free; - - strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ip6_link_ops; - - dev_hold(dev); - ip6_tnl_link(ip6n, t); return t; failed_free: @@ -1230,6 +1248,20 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) return 0; } +static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + struct net *net = dev_net(t->dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + int err; + + ip6_tnl_unlink(ip6n, t); + synchronize_net(); + err = ip6_tnl_change(t, p); + ip6_tnl_link(ip6n, t); + netdev_state_change(t->dev); + return err; +} + static void ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) { @@ -1338,11 +1370,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } else t = netdev_priv(dev); - ip6_tnl_unlink(ip6n, t); - synchronize_net(); - err = ip6_tnl_change(t, &p1); - ip6_tnl_link(ip6n, t); - netdev_state_change(dev); + err = ip6_tnl_update(t, &p1); } if (t) { err = 0; @@ -1498,6 +1526,96 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } +static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + u8 proto; + + if (!data) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} + +static void ip6_tnl_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_IPTUN_LINK]) + parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); + + if (data[IFLA_IPTUN_LOCAL]) + nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL], + sizeof(struct in6_addr)); + + if (data[IFLA_IPTUN_REMOTE]) + nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE], + sizeof(struct in6_addr)); + + if (data[IFLA_IPTUN_TTL]) + parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); + + if (data[IFLA_IPTUN_ENCAP_LIMIT]) + parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); + + if (data[IFLA_IPTUN_FLOWINFO]) + parms->flowinfo = nla_get_u32(data[IFLA_IPTUN_FLOWINFO]); + + if (data[IFLA_IPTUN_FLAGS]) + parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); +} + +static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *nt; + + nt = netdev_priv(dev); + ip6_tnl_netlink_parms(data, &nt->parms); + + if (ip6_tnl_locate(net, &nt->parms, 0)) + return -EEXIST; + + return ip6_tnl_create2(dev); +} + +static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip6_tnl *t; + struct __ip6_tnl_parm p; + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + if (dev == ip6n->fb_tnl_dev) + return -EINVAL; + + ip6_tnl_netlink_parms(data, &p); + + t = ip6_tnl_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else + t = netdev_priv(dev); + + return ip6_tnl_update(t, &p); +} + static size_t ip6_tnl_get_size(const struct net_device *dev) { return @@ -1542,10 +1660,26 @@ nla_put_failure: return -EMSGSIZE; } +static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { + [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, + [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, + [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 }, + [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, + [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, +}; + static struct rtnl_link_ops ip6_link_ops __read_mostly = { .kind = "ip6tnl", .maxtype = IFLA_IPTUN_MAX, + .policy = ip6_tnl_policy, .priv_size = sizeof(struct ip6_tnl), + .setup = ip6_tnl_dev_setup, + .validate = ip6_tnl_validate, + .newlink = ip6_tnl_newlink, + .changelink = ip6_tnl_changelink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, }; -- cgit v1.1 From f9cd5a5536cacc254cd6bd1d8b736a02726ab24a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:14:04 +0000 Subject: sit: always notify change when params are updated netdev_state_change() was called only when end points or link was updated. Now that all parameters are advertised via netlink, we must advertise any change. This patch also prepares the support of sit tunnels management via rtnl. The code which update tunnels will be put in a new function. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5bce2f6..cd6a2b2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -978,28 +978,26 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; } t = netdev_priv(dev); - ipip6_tunnel_unlink(sitn, t); - synchronize_net(); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - ipip6_tunnel_link(sitn, t); - netdev_state_change(dev); } + + ipip6_tunnel_unlink(sitn, t); + synchronize_net(); + t->parms.iph.saddr = p.iph.saddr; + t->parms.iph.daddr = p.iph.daddr; + memcpy(dev->dev_addr, &p.iph.saddr, 4); + memcpy(dev->broadcast, &p.iph.daddr, 4); + ipip6_tunnel_link(sitn, t); + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + if (t->parms.link != p.link) { + t->parms.link = p.link; + ipip6_tunnel_bind_dev(dev); + } + netdev_state_change(dev); } if (t) { err = 0; - if (cmd == SIOCCHGTUNNEL) { - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - if (t->parms.link != p.link) { - t->parms.link = p.link; - ipip6_tunnel_bind_dev(dev); - netdev_state_change(dev); - } - } if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) err = -EFAULT; } else -- cgit v1.1 From a12c9a85813e927e1143989876d70697eb85aac9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:14:05 +0000 Subject: sit/rtnl: add missing parameters on dump IFLA_IPTUN_FLAGS and IFLA_IPTUN_PMTUDISC were missing. There is only one possible flag in i_flag: SIT_ISATAP. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cd6a2b2..69e5e73 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1210,6 +1210,10 @@ static size_t sit_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_TOS */ nla_total_size(1) + + /* IFLA_IPTUN_PMTUDISC */ + nla_total_size(1) + + /* IFLA_IPTUN_FLAGS */ + nla_total_size(2) + 0; } @@ -1222,7 +1226,10 @@ static int sit_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || - nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || + nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, + !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; return 0; -- cgit v1.1 From e4c94a9cdc5041cddaf1218397e4576a2d534f0f Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:14:06 +0000 Subject: sit: rename rtnl functions for consistency Functions in this file start with ipip6_. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 69e5e73..17442de 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1197,7 +1197,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } -static size_t sit_get_size(const struct net_device *dev) +static size_t ipip6_get_size(const struct net_device *dev) { return /* IFLA_IPTUN_LINK */ @@ -1217,7 +1217,7 @@ static size_t sit_get_size(const struct net_device *dev) 0; } -static int sit_fill_info(struct sk_buff *skb, const struct net_device *dev) +static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_parm *parm = &tunnel->parms; @@ -1241,8 +1241,8 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .kind = "sit", .maxtype = IFLA_IPTUN_MAX, .priv_size = sizeof(struct ip_tunnel), - .get_size = sit_get_size, - .fill_info = sit_fill_info, + .get_size = ipip6_get_size, + .fill_info = ipip6_fill_info, }; static struct xfrm_tunnel sit_handler __read_mostly = { -- cgit v1.1 From f37234160233561f2a2e3332272ae5b3725b620b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:14:07 +0000 Subject: sit: add support of link creation via rtnl This patch add the support of 'ip link .. type sit'. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 150 insertions(+), 28 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 17442de..7bd2a06 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -216,6 +216,37 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #endif } +static int ipip6_tunnel_create(struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = dev_net(dev); + struct sit_net *sitn = net_generic(net, sit_net_id); + int err; + + err = ipip6_tunnel_init(dev); + if (err < 0) + goto out; + ipip6_tunnel_clone_6rd(dev, sitn); + + if (t->parms.i_flags & SIT_ISATAP) + dev->priv_flags |= IFF_ISATAP; + + err = register_netdevice(dev); + if (err < 0) + goto out; + + strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &sit_link_ops; + + dev_hold(dev); + + ipip6_tunnel_link(sitn, t); + return 0; + +out: + return err; +} + static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, struct ip_tunnel_parm *parms, int create) { @@ -256,22 +287,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, nt = netdev_priv(dev); nt->parms = *parms; - if (ipip6_tunnel_init(dev) < 0) - goto failed_free; - ipip6_tunnel_clone_6rd(dev, sitn); - - if (parms->i_flags & SIT_ISATAP) - dev->priv_flags |= IFF_ISATAP; - - if (register_netdevice(dev) < 0) + if (ipip6_tunnel_create(dev) < 0) goto failed_free; - strcpy(nt->parms.name, dev->name); - dev->rtnl_link_ops = &sit_link_ops; - - dev_hold(dev); - - ipip6_tunnel_link(sitn, nt); return nt; failed_free: @@ -897,6 +915,27 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) dev->iflink = tunnel->parms.link; } +static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) +{ + struct net *net = dev_net(t->dev); + struct sit_net *sitn = net_generic(net, sit_net_id); + + ipip6_tunnel_unlink(sitn, t); + synchronize_net(); + t->parms.iph.saddr = p->iph.saddr; + t->parms.iph.daddr = p->iph.daddr; + memcpy(t->dev->dev_addr, &p->iph.saddr, 4); + memcpy(t->dev->broadcast, &p->iph.daddr, 4); + ipip6_tunnel_link(sitn, t); + t->parms.iph.ttl = p->iph.ttl; + t->parms.iph.tos = p->iph.tos; + if (t->parms.link != p->link) { + t->parms.link = p->link; + ipip6_tunnel_bind_dev(t->dev); + } + netdev_state_change(t->dev); +} + static int ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -980,20 +1019,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = netdev_priv(dev); } - ipip6_tunnel_unlink(sitn, t); - synchronize_net(); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - ipip6_tunnel_link(sitn, t); - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - if (t->parms.link != p.link) { - t->parms.link = p.link; - ipip6_tunnel_bind_dev(dev); - } - netdev_state_change(dev); + ipip6_tunnel_update(t, &p); } if (t) { @@ -1197,6 +1223,88 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static void ipip6_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + parms->iph.version = 4; + parms->iph.protocol = IPPROTO_IPV6; + parms->iph.ihl = 5; + parms->iph.ttl = 64; + + if (!data) + return; + + if (data[IFLA_IPTUN_LINK]) + parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); + + if (data[IFLA_IPTUN_LOCAL]) + parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]); + + if (data[IFLA_IPTUN_REMOTE]) + parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]); + + if (data[IFLA_IPTUN_TTL]) { + parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); + if (parms->iph.ttl) + parms->iph.frag_off = htons(IP_DF); + } + + if (data[IFLA_IPTUN_TOS]) + parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); + + if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) + parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_IPTUN_FLAGS]) + parms->i_flags = nla_get_u16(data[IFLA_IPTUN_FLAGS]); +} + +static int ipip6_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *nt; + + nt = netdev_priv(dev); + ipip6_netlink_parms(data, &nt->parms); + + if (ipip6_tunnel_locate(net, &nt->parms, 0)) + return -EEXIST; + + return ipip6_tunnel_create(dev); +} + +static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip_tunnel *t; + struct ip_tunnel_parm p; + struct net *net = dev_net(dev); + struct sit_net *sitn = net_generic(net, sit_net_id); + + if (dev == sitn->fb_tunnel_dev) + return -EINVAL; + + ipip6_netlink_parms(data, &p); + + if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || + (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) + return -EINVAL; + + t = ipip6_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else + t = netdev_priv(dev); + + ipip6_tunnel_update(t, &p); + return 0; +} + static size_t ipip6_get_size(const struct net_device *dev) { return @@ -1237,10 +1345,24 @@ nla_put_failure: return -EMSGSIZE; } +static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { + [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, + [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 }, + [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 }, + [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, + [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, + [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, + [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, +}; + static struct rtnl_link_ops sit_link_ops __read_mostly = { .kind = "sit", .maxtype = IFLA_IPTUN_MAX, + .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), + .setup = ipip6_tunnel_setup, + .newlink = ipip6_newlink, + .changelink = ipip6_changelink, .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, }; -- cgit v1.1 From c38132865959c47d36b5db1c9289c7391895be6b Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 15 Nov 2012 09:00:18 +0100 Subject: xfrm: Use a static gc threshold value for ipv6 Unlike ipv4 did, ipv6 does not handle the maximum number of cached routes dynamically. So no need to try to handle the IPsec gc threshold value dynamically. This patch sets the IPsec gc threshold value back to 1024 routes, as it is for non-IPsec routes. Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_policy.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f3ed8ca..6ce4a4f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -327,21 +327,7 @@ static struct ctl_table_header *sysctl_hdr; int __init xfrm6_init(void) { int ret; - unsigned int gc_thresh; - - /* - * We need a good default value for the xfrm6 gc threshold. - * In ipv4 we set it to the route hash table size * 8, which - * is half the size of the maximaum route cache for ipv4. It - * would be good to do the same thing for v6, except the table is - * constructed differently here. Here each table for a net namespace - * can have FIB_TABLE_HASHSZ entries, so lets go with the same - * computation that we used for ipv4 here. Also, lets keep the initial - * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults - * to that as a minimum as well - */ - gc_thresh = FIB6_TABLE_HASHSZ * 8; - xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; + dst_entries_init(&xfrm6_dst_ops); ret = xfrm6_policy_init(); -- cgit v1.1 From d440b72068bb8dff3cc1c2a05b61e60843d82494 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 15 Nov 2012 04:06:41 +0000 Subject: sit: fix sparse warnings This change fixes several sparse warnings about endianness problem. The wrong nla_*() functions were used. It also fix a sparse warning about a flag test (field i_flags). This field is used in this file like a local flag only, so it is more an u16 (gre uses it as a be16). This sparse warning was already there before the patch that add netlink management, the code has just been moved. Reported-by: Fengguang Wu Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7bd2a06..ca6c2c8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -228,7 +228,7 @@ static int ipip6_tunnel_create(struct net_device *dev) goto out; ipip6_tunnel_clone_6rd(dev, sitn); - if (t->parms.i_flags & SIT_ISATAP) + if ((__force u16)t->parms.i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; err = register_netdevice(dev); @@ -1240,10 +1240,10 @@ static void ipip6_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_u32(data[IFLA_IPTUN_LOCAL]); + parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_u32(data[IFLA_IPTUN_REMOTE]); + parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) { parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -1258,7 +1258,7 @@ static void ipip6_netlink_parms(struct nlattr *data[], parms->iph.frag_off = htons(IP_DF); if (data[IFLA_IPTUN_FLAGS]) - parms->i_flags = nla_get_u16(data[IFLA_IPTUN_FLAGS]); + parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); } static int ipip6_newlink(struct net *src_net, struct net_device *dev, @@ -1337,7 +1337,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || - nla_put_u16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) + nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; return 0; -- cgit v1.1 From 1ff05fb7114a6b4118e0f7d89fed2659f7131b0a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 15 Nov 2012 04:06:42 +0000 Subject: ip6tnl: fix sparse warnings in ip6_tnl_netlink_parms() This change fixes a sparse warning triggered by casting the flowinfo from netlink messages in an u32 instead of be32. This change corrects that in order to resolve the sparse warning. Reported-by: Fengguang Wu Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ab4d056..bf3a549 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1568,7 +1568,7 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); if (data[IFLA_IPTUN_FLOWINFO]) - parms->flowinfo = nla_get_u32(data[IFLA_IPTUN_FLOWINFO]); + parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); if (data[IFLA_IPTUN_FLAGS]) parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); -- cgit v1.1 From 22061d8014455b01eb018bd6c35a1b3040ccc230 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:11 +0000 Subject: net: Switch to using the new packet offload infrustructure Convert to using the new GSO/GRO registration mechanism and new packet offload structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a974247..6e24517 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -938,6 +938,10 @@ out_unlock: static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, +}; + +static struct packet_offload ipv6_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IPV6), .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, .gro_receive = ipv6_gro_receive, @@ -946,6 +950,7 @@ static struct packet_type ipv6_packet_type __read_mostly = { static int __init ipv6_packet_init(void) { + dev_add_offload(&ipv6_packet_offload); dev_add_pack(&ipv6_packet_type); return 0; } @@ -953,6 +958,7 @@ static int __init ipv6_packet_init(void) static void ipv6_packet_cleanup(void) { dev_remove_pack(&ipv6_packet_type); + dev_remove_offload(&ipv6_packet_offload); } static int __net_init ipv6_init_mibs(struct net *net) -- cgit v1.1 From 8ca896cfdd17f32f5aa2747644733ebf3725360d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:13 +0000 Subject: ipv6: Add new offload registration infrastructure. Create a new data structure for IPv6 protocols that holds GRO/GSO callbacks and a new array to track the protocols that register GRO/GSO. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 8 ++++++++ net/ipv6/protocol.c | 21 +++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 7 +++++++ net/ipv6/udp.c | 5 +++++ 4 files changed, 41 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index f005acc..cb6f082 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -531,11 +531,19 @@ static const struct inet6_protocol rthdr_protocol = { .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; +static const struct net_offload rthdr_offload = { + .flags = INET6_PROTO_GSO_EXTHDR, +}; + static const struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; +static const struct net_offload dstopt_offload = { + .flags = INET6_PROTO_GSO_EXTHDR, +}; + static const struct inet6_protocol nodata_protocol = { .handler = dst_discard, .flags = INET6_PROTO_NOPOLICY, diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 053082d..f7c53a7d 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -26,6 +26,7 @@ #include const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; +const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly; int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { @@ -34,6 +35,13 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); +int inet6_add_offload(const struct net_offload *prot, unsigned char protocol) +{ + return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol], + NULL, prot) ? 0 : -1; +} +EXPORT_SYMBOL(inet6_add_offload); + /* * Remove a protocol from the hash tables. */ @@ -50,3 +58,16 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol return ret; } EXPORT_SYMBOL(inet6_del_protocol); + +int inet6_del_offload(const struct net_offload *prot, unsigned char protocol) +{ + int ret; + + ret = (cmpxchg((const struct net_offload **)&inet6_offloads[protocol], + prot, NULL) == prot) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(inet6_del_offload); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c73d0eb..6884a95 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2073,6 +2073,13 @@ static const struct inet6_protocol tcpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +static const struct net_offload tcpv6_offload = { + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, +}; + static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fc99972..3ad44e1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1443,6 +1443,11 @@ static const struct inet6_protocol udpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +static const struct net_offload udpv6_offload = { + .gso_send_check = udp6_ufo_send_check, + .gso_segment = udp6_ufo_fragment, +}; + /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -- cgit v1.1 From 3336288a9feaa809839adbaf05778dc2f16665dc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:15 +0000 Subject: ipv6: Switch to using new offload infrastructure. Switch IPv6 protocol to using the new GRO/GSO calls and data. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 22 +++++++++++----------- net/ipv6/exthdrs.c | 38 +++++++++++++++++++++++++++++++++++--- net/ipv6/tcp_ipv6.c | 17 ++++++++++------- net/ipv6/udp.c | 11 ++++++++--- 4 files changed, 64 insertions(+), 24 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6e24517..eb63dac 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -701,14 +701,14 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted); static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) { - const struct inet6_protocol *ops = NULL; + const struct net_offload *ops = NULL; for (;;) { struct ipv6_opt_hdr *opth; int len; if (proto != NEXTHDR_HOP) { - ops = rcu_dereference(inet6_protos[proto]); + ops = rcu_dereference(inet6_offloads[proto]); if (unlikely(!ops)) break; @@ -736,7 +736,7 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) static int ipv6_gso_send_check(struct sk_buff *skb) { const struct ipv6hdr *ipv6h; - const struct inet6_protocol *ops; + const struct net_offload *ops; int err = -EINVAL; if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) @@ -747,7 +747,7 @@ static int ipv6_gso_send_check(struct sk_buff *skb) err = -EPROTONOSUPPORT; rcu_read_lock(); - ops = rcu_dereference(inet6_protos[ + ops = rcu_dereference(inet6_offloads[ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); if (likely(ops && ops->gso_send_check)) { @@ -765,7 +765,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; - const struct inet6_protocol *ops; + const struct net_offload *ops; int proto; struct frag_hdr *fptr; unsigned int unfrag_ip6hlen; @@ -792,7 +792,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); rcu_read_lock(); - ops = rcu_dereference(inet6_protos[proto]); + ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->gso_segment)) { skb_reset_transport_header(skb); segs = ops->gso_segment(skb, features); @@ -825,7 +825,7 @@ out: static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct inet6_protocol *ops; + const struct net_offload *ops; struct sk_buff **pp = NULL; struct sk_buff *p; struct ipv6hdr *iph; @@ -852,7 +852,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, rcu_read_lock(); proto = iph->nexthdr; - ops = rcu_dereference(inet6_protos[proto]); + ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); proto = ipv6_gso_pull_exthdrs(skb, proto); @@ -860,7 +860,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_reset_transport_header(skb); __skb_push(skb, skb_gro_offset(skb)); - ops = rcu_dereference(inet6_protos[proto]); + ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->gro_receive) goto out_unlock; @@ -915,7 +915,7 @@ out: static int ipv6_gro_complete(struct sk_buff *skb) { - const struct inet6_protocol *ops; + const struct net_offload *ops; struct ipv6hdr *iph = ipv6_hdr(skb); int err = -ENOSYS; @@ -923,7 +923,7 @@ static int ipv6_gro_complete(struct sk_buff *skb) sizeof(*iph)); rcu_read_lock(); - ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]); + ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); if (WARN_ON(!ops || !ops->gro_complete)) goto out_unlock; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index cb6f082..de6559e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -549,14 +549,44 @@ static const struct inet6_protocol nodata_protocol = { .flags = INET6_PROTO_NOPOLICY, }; +static int ipv6_exthdrs_offload_init(void) +{ + int ret; + + ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); + if (!ret) + goto out; + + ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); + if (!ret) + goto out_rt; + +out: + return ret; + +out_rt: + inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + goto out; +} + +static void ipv6_exthdrs_offload_exit(void) +{ + inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + inet_del_offload(&rthdr_offload, IPPROTO_DSTOPTS); +} + int __init ipv6_exthdrs_init(void) { int ret; - ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING); + ret = ipv6_exthdrs_offload_init(); if (ret) goto out; + ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING); + if (ret) + goto out_offload; + ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS); if (ret) goto out_rthdr; @@ -567,10 +597,12 @@ int __init ipv6_exthdrs_init(void) out: return ret; -out_rthdr: - inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); out_destopt: inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); +out_rthdr: + inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); +out_offload: + ipv6_exthdrs_offload_exit(); goto out; }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6884a95..635206e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2066,10 +2066,6 @@ static const struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -2116,10 +2112,14 @@ int __init tcpv6_init(void) { int ret; - ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); + ret = inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); if (ret) goto out; + ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); + if (ret) + goto out_offload; + /* register inet6 protocol */ ret = inet6_register_protosw(&tcpv6_protosw); if (ret) @@ -2131,10 +2131,12 @@ int __init tcpv6_init(void) out: return ret; -out_tcpv6_protocol: - inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); +out_tcpv6_protocol: + inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); +out_offload: + inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); goto out; } @@ -2143,4 +2145,5 @@ void tcpv6_exit(void) unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); + inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3ad44e1..e4cc1f4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1438,8 +1438,6 @@ out: static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, - .gso_send_check = udp6_ufo_send_check, - .gso_segment = udp6_ufo_fragment, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -1570,10 +1568,14 @@ int __init udpv6_init(void) { int ret; - ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); + ret = inet6_add_offload(&udpv6_offload, IPPROTO_UDP); if (ret) goto out; + ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); + if (ret) + goto out_offload; + ret = inet6_register_protosw(&udpv6_protosw); if (ret) goto out_udpv6_protocol; @@ -1582,6 +1584,8 @@ out: out_udpv6_protocol: inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); +out_offload: + inet6_del_offload(&udpv6_offload, IPPROTO_UDP); goto out; } @@ -1589,4 +1593,5 @@ void udpv6_exit(void) { inet6_unregister_protosw(&udpv6_protosw); inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); + inet6_del_offload(&udpv6_offload, IPPROTO_UDP); } -- cgit v1.1 From d1da932ed4ecad2a14cbcc01ed589d617d0f0f09 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:16 +0000 Subject: ipv6: Separate ipv6 offload support Separate IPv6 offload functionality into its own file in preparation for the move out of the module Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/Makefile | 3 + net/ipv6/af_inet6.c | 249 +------------------------------------------- net/ipv6/ip6_offload.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_offload.h | 17 +++ 4 files changed, 296 insertions(+), 246 deletions(-) create mode 100644 net/ipv6/ip6_offload.c create mode 100644 net/ipv6/ip6_offload.h (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index b6d3f79..45bd9cd 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,6 +10,8 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o +ipv6-offload := ip6_offload.o + ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o @@ -21,6 +23,7 @@ ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o ipv6-objs += $(ipv6-y) +ipv6-objs += $(ipv6-offload) obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eb63dac..c84d5ba 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,7 @@ #include #include +#include "ip6_offload.h" MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); @@ -699,266 +700,22 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ipv6_opt_accepted); -static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) -{ - const struct net_offload *ops = NULL; - - for (;;) { - struct ipv6_opt_hdr *opth; - int len; - - if (proto != NEXTHDR_HOP) { - ops = rcu_dereference(inet6_offloads[proto]); - - if (unlikely(!ops)) - break; - - if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) - break; - } - - if (unlikely(!pskb_may_pull(skb, 8))) - break; - - opth = (void *)skb->data; - len = ipv6_optlen(opth); - - if (unlikely(!pskb_may_pull(skb, len))) - break; - - proto = opth->nexthdr; - __skb_pull(skb, len); - } - - return proto; -} - -static int ipv6_gso_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - const struct net_offload *ops; - int err = -EINVAL; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - err = -EPROTONOSUPPORT; - - rcu_read_lock(); - ops = rcu_dereference(inet6_offloads[ - ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); - - if (likely(ops && ops->gso_send_check)) { - skb_reset_transport_header(skb); - err = ops->gso_send_check(skb); - } - rcu_read_unlock(); - -out: - return err; -} - -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct ipv6hdr *ipv6h; - const struct net_offload *ops; - int proto; - struct frag_hdr *fptr; - unsigned int unfrag_ip6hlen; - u8 *prevhdr; - int offset = 0; - - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - 0))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - segs = ERR_PTR(-EPROTONOSUPPORT); - - proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - rcu_read_lock(); - ops = rcu_dereference(inet6_offloads[proto]); - if (likely(ops && ops->gso_segment)) { - skb_reset_transport_header(skb); - segs = ops->gso_segment(skb, features); - } - rcu_read_unlock(); - - if (IS_ERR(segs)) - goto out; - - for (skb = segs; skb; skb = skb->next) { - ipv6h = ipv6_hdr(skb); - ipv6h->payload_len = htons(skb->len - skb->mac_len - - sizeof(*ipv6h)); - if (proto == IPPROTO_UDP) { - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - fptr = (struct frag_hdr *)(skb_network_header(skb) + - unfrag_ip6hlen); - fptr->frag_off = htons(offset); - if (skb->next != NULL) - fptr->frag_off |= htons(IP6_MF); - offset += (ntohs(ipv6h->payload_len) - - sizeof(struct frag_hdr)); - } - } - -out: - return segs; -} - -static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) -{ - const struct net_offload *ops; - struct sk_buff **pp = NULL; - struct sk_buff *p; - struct ipv6hdr *iph; - unsigned int nlen; - unsigned int hlen; - unsigned int off; - int flush = 1; - int proto; - __wsum csum; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*iph); - iph = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - iph = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!iph)) - goto out; - } - - skb_gro_pull(skb, sizeof(*iph)); - skb_set_transport_header(skb, skb_gro_offset(skb)); - - flush += ntohs(iph->payload_len) != skb_gro_len(skb); - - rcu_read_lock(); - proto = iph->nexthdr; - ops = rcu_dereference(inet6_offloads[proto]); - if (!ops || !ops->gro_receive) { - __pskb_pull(skb, skb_gro_offset(skb)); - proto = ipv6_gso_pull_exthdrs(skb, proto); - skb_gro_pull(skb, -skb_transport_offset(skb)); - skb_reset_transport_header(skb); - __skb_push(skb, skb_gro_offset(skb)); - - ops = rcu_dereference(inet6_offloads[proto]); - if (!ops || !ops->gro_receive) - goto out_unlock; - - iph = ipv6_hdr(skb); - } - - NAPI_GRO_CB(skb)->proto = proto; - - flush--; - nlen = skb_network_header_len(skb); - - for (p = *head; p; p = p->next) { - const struct ipv6hdr *iph2; - __be32 first_word; /* */ - - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - iph2 = ipv6_hdr(p); - first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - - /* All fields must match except length and Traffic Class. */ - if (nlen != skb_network_header_len(p) || - (first_word & htonl(0xF00FFFFF)) || - memcmp(&iph->nexthdr, &iph2->nexthdr, - nlen - offsetof(struct ipv6hdr, nexthdr))) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - /* flush if Traffic Class fields are different */ - NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); - NAPI_GRO_CB(p)->flush |= flush; - } - - NAPI_GRO_CB(skb)->flush |= flush; - - csum = skb->csum; - skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); - - pp = ops->gro_receive(head, skb); - - skb->csum = csum; - -out_unlock: - rcu_read_unlock(); - -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} - -static int ipv6_gro_complete(struct sk_buff *skb) -{ - const struct net_offload *ops; - struct ipv6hdr *iph = ipv6_hdr(skb); - int err = -ENOSYS; - - iph->payload_len = htons(skb->len - skb_network_offset(skb) - - sizeof(*iph)); - - rcu_read_lock(); - ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); - if (WARN_ON(!ops || !ops->gro_complete)) - goto out_unlock; - - err = ops->gro_complete(skb); - -out_unlock: - rcu_read_unlock(); - - return err; -} - static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, }; -static struct packet_offload ipv6_packet_offload __read_mostly = { - .type = cpu_to_be16(ETH_P_IPV6), - .gso_send_check = ipv6_gso_send_check, - .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, -}; - static int __init ipv6_packet_init(void) { - dev_add_offload(&ipv6_packet_offload); + ipv6_offload_init(); dev_add_pack(&ipv6_packet_type); return 0; } static void ipv6_packet_cleanup(void) { + ipv6_offload_cleanup(); dev_remove_pack(&ipv6_packet_type); - dev_remove_offload(&ipv6_packet_offload); } static int __net_init ipv6_init_mibs(struct net *net) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c new file mode 100644 index 0000000..01cf983 --- /dev/null +++ b/net/ipv6/ip6_offload.c @@ -0,0 +1,273 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include + +#include "ip6_offload.h" + +static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) +{ + const struct net_offload *ops = NULL; + + for (;;) { + struct ipv6_opt_hdr *opth; + int len; + + if (proto != NEXTHDR_HOP) { + ops = rcu_dereference(inet6_offloads[proto]); + + if (unlikely(!ops)) + break; + + if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + + if (unlikely(!pskb_may_pull(skb, 8))) + break; + + opth = (void *)skb->data; + len = ipv6_optlen(opth); + + if (unlikely(!pskb_may_pull(skb, len))) + break; + + proto = opth->nexthdr; + __skb_pull(skb, len); + } + + return proto; +} + +static int ipv6_gso_send_check(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + const struct net_offload *ops; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[ + ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); + + if (likely(ops && ops->gso_send_check)) { + skb_reset_transport_header(skb); + err = ops->gso_send_check(skb); + } + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + const struct net_offload *ops; + int proto; + struct frag_hdr *fptr; + unsigned int unfrag_ip6hlen; + u8 *prevhdr; + int offset = 0; + + if (!(features & NETIF_F_V6_CSUM)) + features &= ~NETIF_F_SG; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + segs = ERR_PTR(-EPROTONOSUPPORT); + + proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[proto]); + if (likely(ops && ops->gso_segment)) { + skb_reset_transport_header(skb); + segs = ops->gso_segment(skb, features); + } + rcu_read_unlock(); + + if (IS_ERR(segs)) + goto out; + + for (skb = segs; skb; skb = skb->next) { + ipv6h = ipv6_hdr(skb); + ipv6h->payload_len = htons(skb->len - skb->mac_len - + sizeof(*ipv6h)); + if (proto == IPPROTO_UDP) { + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + fptr = (struct frag_hdr *)(skb_network_header(skb) + + unfrag_ip6hlen); + fptr->frag_off = htons(offset); + if (skb->next != NULL) + fptr->frag_off |= htons(IP6_MF); + offset += (ntohs(ipv6h->payload_len) - + sizeof(struct frag_hdr)); + } + } + +out: + return segs; +} + +static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + const struct net_offload *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct ipv6hdr *iph; + unsigned int nlen; + unsigned int hlen; + unsigned int off; + int flush = 1; + int proto; + __wsum csum; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*iph); + iph = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + iph = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!iph)) + goto out; + } + + skb_gro_pull(skb, sizeof(*iph)); + skb_set_transport_header(skb, skb_gro_offset(skb)); + + flush += ntohs(iph->payload_len) != skb_gro_len(skb); + + rcu_read_lock(); + proto = iph->nexthdr; + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->gro_receive) { + __pskb_pull(skb, skb_gro_offset(skb)); + proto = ipv6_gso_pull_exthdrs(skb, proto); + skb_gro_pull(skb, -skb_transport_offset(skb)); + skb_reset_transport_header(skb); + __skb_push(skb, skb_gro_offset(skb)); + + ops = rcu_dereference(inet6_offloads[proto]); + if (!ops || !ops->gro_receive) + goto out_unlock; + + iph = ipv6_hdr(skb); + } + + NAPI_GRO_CB(skb)->proto = proto; + + flush--; + nlen = skb_network_header_len(skb); + + for (p = *head; p; p = p->next) { + const struct ipv6hdr *iph2; + __be32 first_word; /* */ + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ipv6_hdr(p); + first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; + + /* All fields must match except length and Traffic Class. */ + if (nlen != skb_network_header_len(p) || + (first_word & htonl(0xF00FFFFF)) || + memcmp(&iph->nexthdr, &iph2->nexthdr, + nlen - offsetof(struct ipv6hdr, nexthdr))) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + /* flush if Traffic Class fields are different */ + NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + + csum = skb->csum; + skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + + pp = ops->gro_receive(head, skb); + + skb->csum = csum; + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int ipv6_gro_complete(struct sk_buff *skb) +{ + const struct net_offload *ops; + struct ipv6hdr *iph = ipv6_hdr(skb); + int err = -ENOSYS; + + iph->payload_len = htons(skb->len - skb_network_offset(skb) - + sizeof(*iph)); + + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); + if (WARN_ON(!ops || !ops->gro_complete)) + goto out_unlock; + + err = ops->gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + +static struct packet_offload ipv6_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IPV6), + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, +}; + +void __init ipv6_offload_init(void) +{ + dev_add_offload(&ipv6_packet_offload); +} + +void ipv6_offload_cleanup(void) +{ + dev_remove_offload(&ipv6_packet_offload); +} diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h new file mode 100644 index 0000000..c09614e --- /dev/null +++ b/net/ipv6/ip6_offload.h @@ -0,0 +1,17 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ip6_offload_h +#define __ip6_offload_h + +extern void ipv6_offload_init(void); +extern void ipv6_offload_cleanup(void); + +#endif -- cgit v1.1 From 8663e02aba154e04679c9bb1665af52021d32547 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:17 +0000 Subject: ipv6: Separate tcp offload functionality Pull TCPv6 offload functionality into its won file in preparation for moving it out of the module. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/Makefile | 2 +- net/ipv6/ip6_offload.h | 3 ++ net/ipv6/tcp_ipv6.c | 113 ++--------------------------------------------- net/ipv6/tcpv6_offload.c | 98 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 110 deletions(-) create mode 100644 net/ipv6/tcpv6_offload.c (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 45bd9cd..f47ad9f 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o -ipv6-offload := ip6_offload.o +ipv6-offload := ip6_offload.o tcpv6_offload.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h index c09614e..1891946 100644 --- a/net/ipv6/ip6_offload.h +++ b/net/ipv6/ip6_offload.h @@ -11,6 +11,9 @@ #ifndef __ip6_offload_h #define __ip6_offload_h +int tcpv6_offload_init(void); +void tcpv6_offload_cleanup(void); + extern void ipv6_offload_init(void); extern void ipv6_offload_cleanup(void); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 635206e..5bed594 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -71,15 +71,13 @@ #include #include +#include "ip6_offload.h" static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static void __tcp_v6_send_check(struct sk_buff *skb, - const struct in6_addr *saddr, - const struct in6_addr *daddr); static const struct inet_connection_sock_af_ops ipv6_mapped; static const struct inet_connection_sock_af_ops ipv6_specific; @@ -119,14 +117,6 @@ static void tcp_v6_hash(struct sock *sk) } } -static __inline__ __sum16 tcp_v6_check(int len, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - __wsum base) -{ - return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); -} - static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, @@ -722,94 +712,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { }; #endif -static void __tcp_v6_send_check(struct sk_buff *skb, - const struct in6_addr *saddr, const struct in6_addr *daddr) -{ - struct tcphdr *th = tcp_hdr(skb); - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v6_check(skb->len, saddr, daddr, - csum_partial(th, th->doff << 2, - skb->csum)); - } -} - -static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - __tcp_v6_send_check(skb, &np->saddr, &np->daddr); -} - -static int tcp_v6_gso_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - ipv6h = ipv6_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); - return 0; -} - -static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) -{ - const struct ipv6hdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -static int tcp6_gro_complete(struct sk_buff *skb) -{ - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), - &iph->saddr, &iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - - return tcp_gro_complete(skb); -} - static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) { @@ -2069,13 +1971,6 @@ static const struct inet6_protocol tcpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static const struct net_offload tcpv6_offload = { - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, -}; - static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, @@ -2112,7 +2007,7 @@ int __init tcpv6_init(void) { int ret; - ret = inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); + ret = tcpv6_offload_init(); if (ret) goto out; @@ -2136,7 +2031,7 @@ out_tcpv6_protosw: out_tcpv6_protocol: inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); out_offload: - inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); + tcpv6_offload_cleanup(); goto out; } @@ -2145,5 +2040,5 @@ void tcpv6_exit(void) unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); - inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); + tcpv6_offload_cleanup(); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c new file mode 100644 index 0000000..edeafed --- /dev/null +++ b/net/ipv6/tcpv6_offload.c @@ -0,0 +1,98 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TCPv6 GSO/GRO support + */ +#include +#include +#include +#include +#include "ip6_offload.h" + +static int tcp_v6_gso_send_check(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + ipv6h = ipv6_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); + return 0; +} + +static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + + case CHECKSUM_NONE: + wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), + IPPROTO_TCP, 0)); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + return tcp_gro_receive(head, skb); +} + +static int tcp6_gro_complete(struct sk_buff *skb) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), + &iph->saddr, &iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + + return tcp_gro_complete(skb); +} + +static const struct net_offload tcpv6_offload = { + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, +}; + +int __init tcpv6_offload_init(void) +{ + return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); +} + +void tcpv6_offload_cleanup(void) +{ + inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); +} -- cgit v1.1 From 5edbb07dc9474b7d4cd4391a2e6551ad067a0f96 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:18 +0000 Subject: ipv6: Separate out UDP offload functionality Pull UDP GSO code into a separate file in preparation for moving the code out of the module. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/Makefile | 2 +- net/ipv6/ip6_offload.h | 3 ++ net/ipv6/udp.c | 104 ++--------------------------------------- net/ipv6/udp_offload.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 101 deletions(-) create mode 100644 net/ipv6/udp_offload.c (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index f47ad9f..04b5c96 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o -ipv6-offload := ip6_offload.o tcpv6_offload.o +ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h index 1891946..dff7936 100644 --- a/net/ipv6/ip6_offload.h +++ b/net/ipv6/ip6_offload.h @@ -11,6 +11,9 @@ #ifndef __ip6_offload_h #define __ip6_offload_h +int udp_offload_init(void); +void udp_offload_cleanup(void); + int tcpv6_offload_init(void); void tcpv6_offload_cleanup(void); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e4cc1f4..013fef7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -50,6 +50,7 @@ #include #include #include "udp_impl.h" +#include "ip6_offload.h" int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { @@ -1343,109 +1344,12 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, } #endif -static int udp6_ufo_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) - return -EINVAL; - - ipv6h = ipv6_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - return 0; -} - -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - unsigned int unfrag_ip6hlen, unfrag_len; - struct frag_hdr *fptr; - u8 *mac_start, *prevhdr; - u8 nexthdr; - u8 frag_hdr_sz = sizeof(struct frag_hdr); - int offset; - __wsum csum; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - /* Check if there is enough headroom to insert fragment header. */ - if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && - pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) - goto out; - - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + - unfrag_ip6hlen; - mac_start = skb_mac_header(skb); - memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); - - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - -out: - return segs; -} - static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static const struct net_offload udpv6_offload = { - .gso_send_check = udp6_ufo_send_check, - .gso_segment = udp6_ufo_fragment, -}; - /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS @@ -1568,7 +1472,7 @@ int __init udpv6_init(void) { int ret; - ret = inet6_add_offload(&udpv6_offload, IPPROTO_UDP); + ret = udp_offload_init(); if (ret) goto out; @@ -1585,7 +1489,7 @@ out: out_udpv6_protocol: inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); out_offload: - inet6_del_offload(&udpv6_offload, IPPROTO_UDP); + udp_offload_cleanup(); goto out; } @@ -1593,5 +1497,5 @@ void udpv6_exit(void) { inet6_unregister_protosw(&udpv6_protosw); inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); - inet6_del_offload(&udpv6_offload, IPPROTO_UDP); + udp_offload_cleanup(); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c new file mode 100644 index 0000000..f964d2b --- /dev/null +++ b/net/ipv6/udp_offload.c @@ -0,0 +1,122 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * UDPv6 GSO support + */ +#include +#include +#include +#include +#include "ip6_offload.h" + +static int udp6_ufo_send_check(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + struct udphdr *uh; + + if (!pskb_may_pull(skb, sizeof(*uh))) + return -EINVAL; + + ipv6h = ipv6_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + return 0; +} + +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + unsigned int unfrag_ip6hlen, unfrag_len; + struct frag_hdr *fptr; + u8 *mac_start, *prevhdr; + u8 nexthdr; + u8 frag_hdr_sz = sizeof(struct frag_hdr); + int offset; + __wsum csum; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot + * do checksum of UDP packets sent as multiple IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + /* Check if there is enough headroom to insert fragment header. */ + if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && + pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) + goto out; + + /* Find the unfragmentable header and shift it left by frag_hdr_sz + * bytes to insert fragment header. + */ + unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); + nexthdr = *prevhdr; + *prevhdr = NEXTHDR_FRAGMENT; + unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + + unfrag_ip6hlen; + mac_start = skb_mac_header(skb); + memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); + + skb->mac_header -= frag_hdr_sz; + skb->network_header -= frag_hdr_sz; + + fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); + fptr->nexthdr = nexthdr; + fptr->reserved = 0; + ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + + /* Fragment the skb. ipv6 header and the remaining fields of the + * fragment header are updated in ipv6_gso_segment() + */ + segs = skb_segment(skb, features); + +out: + return segs; +} +static const struct net_offload udpv6_offload = { + .gso_send_check = udp6_ufo_send_check, + .gso_segment = udp6_ufo_fragment, +}; + +int __init udp_offload_init(void) +{ + return inet6_add_offload(&udpv6_offload, IPPROTO_UDP); +} + +void udp_offload_cleanup(void) +{ + inet6_del_offload(&udpv6_offload, IPPROTO_UDP); +} -- cgit v1.1 From 2207afc8bfd80d596b524d4feb6b27f5ce359d59 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:19 +0000 Subject: ipv6: Move exthdr offload support into separate file Move the exthdr offload functionality into a separeate file in preparate for moving it out of the module Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/Makefile | 2 +- net/ipv6/exthdrs.c | 40 ++++----------------------------------- net/ipv6/exthdrs_offload.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_offload.h | 3 +++ 4 files changed, 55 insertions(+), 37 deletions(-) create mode 100644 net/ipv6/exthdrs_offload.c (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 04b5c96..7f25077 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o -ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o +ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index de6559e..70fbf6b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -48,6 +48,7 @@ #endif #include +#include "ip6_offload.h" int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) { @@ -528,20 +529,12 @@ unknown_rh: static const struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, - .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, -}; - -static const struct net_offload rthdr_offload = { - .flags = INET6_PROTO_GSO_EXTHDR, + .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, - .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, -}; - -static const struct net_offload dstopt_offload = { - .flags = INET6_PROTO_GSO_EXTHDR, + .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol nodata_protocol = { @@ -549,32 +542,6 @@ static const struct inet6_protocol nodata_protocol = { .flags = INET6_PROTO_NOPOLICY, }; -static int ipv6_exthdrs_offload_init(void) -{ - int ret; - - ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) - goto out; - - ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) - goto out_rt; - -out: - return ret; - -out_rt: - inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); - goto out; -} - -static void ipv6_exthdrs_offload_exit(void) -{ - inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); - inet_del_offload(&rthdr_offload, IPPROTO_DSTOPTS); -} - int __init ipv6_exthdrs_init(void) { int ret; @@ -608,6 +575,7 @@ out_offload: void ipv6_exthdrs_exit(void) { + ipv6_exthdrs_offload_exit(); inet6_del_protocol(&nodata_protocol, IPPROTO_NONE); inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c new file mode 100644 index 0000000..271bf4a --- /dev/null +++ b/net/ipv6/exthdrs_offload.c @@ -0,0 +1,47 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET6 implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * IPV6 Extension Header GSO/GRO support + */ +#include +#include "ip6_offload.h" + +static const struct net_offload rthdr_offload = { + .flags = INET6_PROTO_GSO_EXTHDR, +}; + +static const struct net_offload dstopt_offload = { + .flags = INET6_PROTO_GSO_EXTHDR, +}; + +int __init ipv6_exthdrs_offload_init(void) +{ + int ret; + + ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); + if (!ret) + goto out; + + ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); + if (!ret) + goto out_rt; + +out: + return ret; + +out_rt: + inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + goto out; +} + +void ipv6_exthdrs_offload_exit(void) +{ + inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); + inet_del_offload(&rthdr_offload, IPPROTO_DSTOPTS); +} diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h index dff7936..4e88ddb 100644 --- a/net/ipv6/ip6_offload.h +++ b/net/ipv6/ip6_offload.h @@ -11,6 +11,9 @@ #ifndef __ip6_offload_h #define __ip6_offload_h +int ipv6_exthdrs_offload_init(void); +void ipv6_exthdrs_offload_exit(void); + int udp_offload_init(void); void udp_offload_cleanup(void); -- cgit v1.1 From 3c73a0368e995f047c14388a05dcfba599053bef Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:20 +0000 Subject: ipv6: Update ipv6 static library with newly needed functions UDP offload needs some additional functions to be in the static kernel for it work correclty. Move those functions into the core. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/Makefile | 2 +- net/ipv6/exthdrs.c | 44 ---------------------------- net/ipv6/exthdrs_core.c | 44 ++++++++++++++++++++++++++++ net/ipv6/ip6_output.c | 65 ------------------------------------------ net/ipv6/output_core.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+), 110 deletions(-) create mode 100644 net/ipv6/output_core.c (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 7f25077..cdca302 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -41,6 +41,6 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o +obj-y += addrconf_core.o exthdrs_core.o output_core.o obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 70fbf6b..a786a20 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -50,50 +50,6 @@ #include #include "ip6_offload.h" -int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) -{ - const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - skb->network_header; - struct ipv6_opt_hdr *hdr; - int len; - - if (offset + 2 > packet_len) - goto bad; - hdr = (struct ipv6_opt_hdr *)(nh + offset); - len = ((hdr->hdrlen + 1) << 3); - - if (offset + len > packet_len) - goto bad; - - offset += 2; - len -= 2; - - while (len > 0) { - int opttype = nh[offset]; - int optlen; - - if (opttype == type) - return offset; - - switch (opttype) { - case IPV6_TLV_PAD1: - optlen = 1; - break; - default: - optlen = nh[offset + 1] + 2; - if (optlen > len) - goto bad; - break; - } - offset += optlen; - len -= optlen; - } - /* not_found */ - bad: - return -1; -} -EXPORT_SYMBOL_GPL(ipv6_find_tlv); - /* * Parsing tlv encoded headers. * diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index f73d59a..e7d756e 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -111,3 +111,47 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, return start; } EXPORT_SYMBOL(ipv6_skip_exthdr); + +int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) +{ + const unsigned char *nh = skb_network_header(skb); + int packet_len = skb->tail - skb->network_header; + struct ipv6_opt_hdr *hdr; + int len; + + if (offset + 2 > packet_len) + goto bad; + hdr = (struct ipv6_opt_hdr *)(nh + offset); + len = ((hdr->hdrlen + 1) << 3); + + if (offset + len > packet_len) + goto bad; + + offset += 2; + len -= 2; + + while (len > 0) { + int opttype = nh[offset]; + int optlen; + + if (opttype == type) + return offset; + + switch (opttype) { + case IPV6_TLV_PAD1: + optlen = 1; + break; + default: + optlen = nh[offset + 1] + 2; + if (optlen > len) + goto bad; + break; + } + offset += optlen; + len -= optlen; + } + /* not_found */ + bad: + return -1; +} +EXPORT_SYMBOL_GPL(ipv6_find_tlv); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3deaa4e..5552d13 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -544,71 +544,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; - int found_rhdr = 0; - *nexthdr = &ipv6_hdr(skb)->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - found_rhdr = 1; - break; - case NEXTHDR_DEST: -#if IS_ENABLED(CONFIG_IPV6_MIP6) - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) - break; -#endif - if (found_rhdr) - return offset; - break; - default : - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + - offset); - } - - return offset; -} - -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static atomic_t ipv6_fragmentation_id; - int old, new; - - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } - } - do { - old = atomic_read(&ipv6_fragmentation_id); - new = old + 1; - if (!new) - new = 1; - } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - fhdr->identification = htonl(new); -} - int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c new file mode 100644 index 0000000..c2e73e6 --- /dev/null +++ b/net/ipv6/output_core.c @@ -0,0 +1,76 @@ +/* + * IPv6 library code, needed by static components when full IPv6 support is + * not configured or static. These functions are needed by GSO/GRO implementation. + */ +#include +#include +#include + +void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static atomic_t ipv6_fragmentation_id; + int old, new; + +#if IS_ENABLED(CONFIG_IPV6) + if (rt && !(rt->dst.flags & DST_NOPEER)) { + struct inet_peer *peer; + struct net *net; + + net = dev_net(rt->dst.dev); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + if (peer) { + fhdr->identification = htonl(inet_getid(peer, 0)); + inet_putpeer(peer); + return; + } + } +#endif + do { + old = atomic_read(&ipv6_fragmentation_id); + new = old + 1; + if (!new) + new = 1; + } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); + fhdr->identification = htonl(new); +} +EXPORT_SYMBOL(ipv6_select_ident); + +int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = + (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); + unsigned int packet_len = skb->tail - skb->network_header; + int found_rhdr = 0; + *nexthdr = &ipv6_hdr(skb)->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + found_rhdr = 1; + break; + case NEXTHDR_DEST: +#if IS_ENABLED(CONFIG_IPV6_MIP6) + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + break; +#endif + if (found_rhdr) + return offset; + break; + default : + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); + } + + return offset; +} +EXPORT_SYMBOL(ip6_find_1stfragopt); -- cgit v1.1 From c6b641a4c6b32f39db678c2441cb1ef824110d74 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:22 +0000 Subject: ipv6: Pull IPv6 GSO registration out of the module Sing GSO support is now separate, pull it out of the module and make it its own init call. Remove the cleanup functions as they are no longer called. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/Makefile | 6 +++--- net/ipv6/af_inet6.c | 3 --- net/ipv6/exthdrs.c | 10 +--------- net/ipv6/exthdrs_offload.c | 6 ------ net/ipv6/ip6_offload.c | 17 ++++++++++++----- net/ipv6/ip6_offload.h | 8 -------- net/ipv6/protocol.c | 20 ++++++++++++-------- net/ipv6/tcp_ipv6.c | 10 +--------- net/ipv6/tcpv6_offload.c | 5 ----- net/ipv6/udp.c | 10 +--------- net/ipv6/udp_offload.c | 5 ----- 11 files changed, 30 insertions(+), 70 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index cdca302..04a475d 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o @@ -23,7 +23,6 @@ ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o ipv6-objs += $(ipv6-y) -ipv6-objs += $(ipv6-offload) obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o @@ -41,6 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o output_core.o +obj-y += addrconf_core.o exthdrs_core.o output_core.o protocol.o +obj-y += $(ipv6-offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c84d5ba..7bafc51 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,7 +62,6 @@ #include #include -#include "ip6_offload.h" MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); @@ -707,14 +706,12 @@ static struct packet_type ipv6_packet_type __read_mostly = { static int __init ipv6_packet_init(void) { - ipv6_offload_init(); dev_add_pack(&ipv6_packet_type); return 0; } static void ipv6_packet_cleanup(void) { - ipv6_offload_cleanup(); dev_remove_pack(&ipv6_packet_type); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a786a20..473f628 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -48,7 +48,6 @@ #endif #include -#include "ip6_offload.h" /* * Parsing tlv encoded headers. @@ -502,13 +501,9 @@ int __init ipv6_exthdrs_init(void) { int ret; - ret = ipv6_exthdrs_offload_init(); - if (ret) - goto out; - ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING); if (ret) - goto out_offload; + goto out; ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS); if (ret) @@ -524,14 +519,11 @@ out_destopt: inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); out_rthdr: inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); -out_offload: - ipv6_exthdrs_offload_exit(); goto out; }; void ipv6_exthdrs_exit(void) { - ipv6_exthdrs_offload_exit(); inet6_del_protocol(&nodata_protocol, IPPROTO_NONE); inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index 271bf4a..cf77f3a 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -39,9 +39,3 @@ out_rt: inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); goto out; } - -void ipv6_exthdrs_offload_exit(void) -{ - inet_del_offload(&rthdr_offload, IPPROTO_ROUTING); - inet_del_offload(&rthdr_offload, IPPROTO_DSTOPTS); -} diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 01cf983..63d79d9 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -262,12 +263,18 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { .gro_complete = ipv6_gro_complete, }; -void __init ipv6_offload_init(void) +static int __init ipv6_offload_init(void) { + + if (tcpv6_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); + if (udp_offload_init() < 0) + pr_crit("%s: Cannot add UDP protocol offload\n", __func__); + if (ipv6_exthdrs_offload_init() < 0) + pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__); + dev_add_offload(&ipv6_packet_offload); + return 0; } -void ipv6_offload_cleanup(void) -{ - dev_remove_offload(&ipv6_packet_offload); -} +fs_initcall(ipv6_offload_init); diff --git a/net/ipv6/ip6_offload.h b/net/ipv6/ip6_offload.h index 4e88ddb..2e155c6 100644 --- a/net/ipv6/ip6_offload.h +++ b/net/ipv6/ip6_offload.h @@ -12,15 +12,7 @@ #define __ip6_offload_h int ipv6_exthdrs_offload_init(void); -void ipv6_exthdrs_offload_exit(void); - int udp_offload_init(void); -void udp_offload_cleanup(void); - int tcpv6_offload_init(void); -void tcpv6_offload_cleanup(void); - -extern void ipv6_offload_init(void); -extern void ipv6_offload_cleanup(void); #endif diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index f7c53a7d..22d1bd4 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -25,8 +25,9 @@ #include #include +#if IS_ENABLED(CONFIG_IPV6) const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; -const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly; +EXPORT_SYMBOL(inet6_protos); int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { @@ -35,13 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); -int inet6_add_offload(const struct net_offload *prot, unsigned char protocol) -{ - return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol], - NULL, prot) ? 0 : -1; -} -EXPORT_SYMBOL(inet6_add_offload); - /* * Remove a protocol from the hash tables. */ @@ -58,6 +52,16 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol return ret; } EXPORT_SYMBOL(inet6_del_protocol); +#endif + +const struct net_offload __rcu *inet6_offloads[MAX_INET_PROTOS] __read_mostly; + +int inet6_add_offload(const struct net_offload *prot, unsigned char protocol) +{ + return !cmpxchg((const struct net_offload **)&inet6_offloads[protocol], + NULL, prot) ? 0 : -1; +} +EXPORT_SYMBOL(inet6_add_offload); int inet6_del_offload(const struct net_offload *prot, unsigned char protocol) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5bed594..6c0f252 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -71,7 +71,6 @@ #include #include -#include "ip6_offload.h" static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, @@ -2007,13 +2006,9 @@ int __init tcpv6_init(void) { int ret; - ret = tcpv6_offload_init(); - if (ret) - goto out; - ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); if (ret) - goto out_offload; + goto out; /* register inet6 protocol */ ret = inet6_register_protosw(&tcpv6_protosw); @@ -2030,8 +2025,6 @@ out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); out_tcpv6_protocol: inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); -out_offload: - tcpv6_offload_cleanup(); goto out; } @@ -2040,5 +2033,4 @@ void tcpv6_exit(void) unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); - tcpv6_offload_cleanup(); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index edeafed..3a27fe6 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -91,8 +91,3 @@ int __init tcpv6_offload_init(void) { return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); } - -void tcpv6_offload_cleanup(void) -{ - inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); -} diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 013fef7..dfaa29b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -50,7 +50,6 @@ #include #include #include "udp_impl.h" -#include "ip6_offload.h" int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { @@ -1472,13 +1471,9 @@ int __init udpv6_init(void) { int ret; - ret = udp_offload_init(); - if (ret) - goto out; - ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); if (ret) - goto out_offload; + goto out; ret = inet6_register_protosw(&udpv6_protosw); if (ret) @@ -1488,8 +1483,6 @@ out: out_udpv6_protocol: inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); -out_offload: - udp_offload_cleanup(); goto out; } @@ -1497,5 +1490,4 @@ void udpv6_exit(void) { inet6_unregister_protosw(&udpv6_protosw); inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); - udp_offload_cleanup(); } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index f964d2b..979e4ab 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -115,8 +115,3 @@ int __init udp_offload_init(void) { return inet6_add_offload(&udpv6_offload, IPPROTO_UDP); } - -void udp_offload_cleanup(void) -{ - inet6_del_offload(&udpv6_offload, IPPROTO_UDP); -} -- cgit v1.1 From f191a1d17f227032c159e5499809f545402b6dc6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:23 +0000 Subject: net: Remove code duplication between offload structures Move the offload callbacks into its own structure. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 28 +++++++++++++++------------- net/ipv6/tcpv6_offload.c | 10 ++++++---- net/ipv6/udp_offload.c | 6 ++++-- 3 files changed, 25 insertions(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 63d79d9..f26f0da 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -70,9 +70,9 @@ static int ipv6_gso_send_check(struct sk_buff *skb) ops = rcu_dereference(inet6_offloads[ ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); - if (likely(ops && ops->gso_send_check)) { + if (likely(ops && ops->callbacks.gso_send_check)) { skb_reset_transport_header(skb); - err = ops->gso_send_check(skb); + err = ops->callbacks.gso_send_check(skb); } rcu_read_unlock(); @@ -113,9 +113,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); rcu_read_lock(); ops = rcu_dereference(inet6_offloads[proto]); - if (likely(ops && ops->gso_segment)) { + if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); - segs = ops->gso_segment(skb, features); + segs = ops->callbacks.gso_segment(skb, features); } rcu_read_unlock(); @@ -173,7 +173,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, rcu_read_lock(); proto = iph->nexthdr; ops = rcu_dereference(inet6_offloads[proto]); - if (!ops || !ops->gro_receive) { + if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); @@ -181,7 +181,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, __skb_push(skb, skb_gro_offset(skb)); ops = rcu_dereference(inet6_offloads[proto]); - if (!ops || !ops->gro_receive) + if (!ops || !ops->callbacks.gro_receive) goto out_unlock; iph = ipv6_hdr(skb); @@ -220,7 +220,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, csum = skb->csum; skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); - pp = ops->gro_receive(head, skb); + pp = ops->callbacks.gro_receive(head, skb); skb->csum = csum; @@ -244,10 +244,10 @@ static int ipv6_gro_complete(struct sk_buff *skb) rcu_read_lock(); ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); - if (WARN_ON(!ops || !ops->gro_complete)) + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->gro_complete(skb); + err = ops->callbacks.gro_complete(skb); out_unlock: rcu_read_unlock(); @@ -257,10 +257,12 @@ out_unlock: static struct packet_offload ipv6_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), - .gso_send_check = ipv6_gso_send_check, - .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, + .callbacks = { + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, + }, }; static int __init ipv6_offload_init(void) diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 3a27fe6..2ec6bf6 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -81,10 +81,12 @@ static int tcp6_gro_complete(struct sk_buff *skb) } static const struct net_offload tcpv6_offload = { - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, + .callbacks = { + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, + }, }; int __init tcpv6_offload_init(void) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 979e4ab..8e01c44 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -107,8 +107,10 @@ out: return segs; } static const struct net_offload udpv6_offload = { - .gso_send_check = udp6_ufo_send_check, - .gso_segment = udp6_ufo_fragment, + .callbacks = { + .gso_send_check = udp6_ufo_send_check, + .gso_segment = udp6_ufo_fragment, + }, }; int __init udp_offload_init(void) -- cgit v1.1 From d4d0d3557b4d6ee735fbec275803d637ee26b42d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 16:35:37 +0000 Subject: ipv6: Fix build error with udp_offload Add ip6_checksum.h include. This should resolve the following issue that shows up on power: net/ipv6/udp_offload.c: In function 'udp6_ufo_send_check': net/ipv6/udp_offload.c:29:2: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/udp_offload.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 8e01c44..0c8934a 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "ip6_offload.h" static int udp6_ufo_send_check(struct sk_buff *skb) -- cgit v1.1 From 0afe21fdf6cfe0fe8a184d82a399773cc331bf40 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 16 Nov 2012 08:07:56 +0100 Subject: xfrm6: Remove commented out function call to xfrm6_input_fini xfrm6_input_fini() is not in the tree since more than 10 years, so remove the commented out function call. Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_policy.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6ce4a4f..c984413 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -356,7 +356,6 @@ void xfrm6_fini(void) if (sysctl_hdr) unregister_net_sysctl_table(sysctl_hdr); #endif - //xfrm6_input_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); dst_entries_destroy(&xfrm6_dst_ops); -- cgit v1.1 From 75fe83c32248d99e6d5fe64155e519b78bb90481 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 Nov 2012 09:41:21 +0000 Subject: ipv6: Preserve ipv6 functionality needed by NET Some pieces of network use core pieces of IPv6 stack. Keep them available while letting new GSO offload pieces depend on CONFIG_INET. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 04a475d..2068ac4 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o output_core.o protocol.o -obj-y += $(ipv6-offload) +obj-y += addrconf_core.o exthdrs_core.o +obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6_offload) obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o -- cgit v1.1 From 464dc801c76aa0db88e16e8f5f47c6879858b9b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:02:59 +0000 Subject: net: Don't export sysctls to unprivileged users In preparation for supporting the creation of network namespaces by unprivileged users, modify all of the per net sysctl exports and refuse to allow them to unprivileged users. This makes it safe for unprivileged users in general to access per net sysctls, and allows sysctls to be exported to unprivileged users on an individual basis as they are deemed safe. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++++ net/ipv6/icmp.c | 7 ++++++- net/ipv6/reassembly.c | 4 ++++ net/ipv6/route.c | 4 ++++ net/ipv6/sysctl_net_ipv6.c | 4 ++++ 5 files changed, 22 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cb803b7..b24b4de 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4735,6 +4735,10 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, t->addrconf_vars[i].extra2 = net; } + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + t->addrconf_vars[0].procname = NULL; + snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4a9fd5..d77dc1e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -967,9 +967,14 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) sizeof(ipv6_icmp_table_template), GFP_KERNEL); - if (table) + if (table) { table[0].data = &net->ipv6.sysctl.icmpv6_time; + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; + } + return table; } #endif diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index da8a4e3..e5253ec 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -616,6 +616,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table[0].data = &net->ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; table[2].data = &net->ipv6.frags.timeout; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; } hdr = register_net_sysctl(net, "net/ipv6", table); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11249d2..021a48e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2989,6 +2989,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; } return table; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e85c48b..b06fd07 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -52,6 +52,10 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) goto out; ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + ipv6_table[0].procname = NULL; + ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) goto out_ipv6_table; -- cgit v1.1 From dfc47ef8639facd77210e74be831943c2fdd9c74 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:00 +0000 Subject: net: Push capable(CAP_NET_ADMIN) into the rtnl methods - In rtnetlink_rcv_msg convert the capable(CAP_NET_ADMIN) check to ns_capable(net->user-ns, CAP_NET_ADMIN). Allowing unprivileged users to make netlink calls to modify their local network namespace. - In the rtnetlink doit methods add capable(CAP_NET_ADMIN) so that calls that are not safe for unprivileged users are still protected. Later patches will remove the extra capable calls from methods that are safe for unprivilged users. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++++ net/ipv6/addrlabel.c | 3 +++ net/ipv6/route.c | 6 ++++++ 3 files changed, 15 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b24b4de..e21bdb9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3514,6 +3514,9 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct in6_addr *pfx; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3584,6 +3587,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 ifa_flags; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index ff76eec..b106f80 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -425,6 +425,9 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, u32 label; int err = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 021a48e..c6215e2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2446,6 +2446,9 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a struct fib6_config cfg; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2461,6 +2464,9 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a struct fib6_config cfg; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; -- cgit v1.1 From af31f412c7c7a3c0fda4bf4beaf0c85af1f263c8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:06 +0000 Subject: net: Allow userns root to control ipv6 Allow an unpriviled user who has created a user namespace, and then created a network namespace to effectively use the new network namespace, by reducing capable(CAP_NET_ADMIN) and capable(CAP_NET_RAW) calls to be ns_capable(net->user_ns, CAP_NET_ADMIN), or capable(net->user_ns, CAP_NET_RAW) calls. Settings that merely control a single network device are allowed. Either the network device is a logical network device where restrictions make no difference or the network device is hardware NIC that has been explicity moved from the initial network namespace. In general policy and network stack state changes are allowed while resource control is left unchanged. Allow the SIOCSIFADDR ioctl to add ipv6 addresses. Allow the SIOCDIFADDR ioctl to delete ipv6 addresses. Allow the SIOCADDRT ioctl to add ipv6 routes. Allow the SIOCDELRT ioctl to delete ipv6 routes. Allow creation of ipv6 raw sockets. Allow setting the IPV6_JOIN_ANYCAST socket option. Allow setting the IPV6_FL_A_RENEW parameter of the IPV6_FLOWLABEL_MGR socket option. Allow setting the IPV6_TRANSPARENT socket option. Allow setting the IPV6_HOPOPTS socket option. Allow setting the IPV6_RTHDRDSTOPTS socket option. Allow setting the IPV6_DSTOPTS socket option. Allow setting the IPV6_IPSEC_POLICY socket option. Allow setting the IPV6_XFRM_POLICY socket option. Allow sending packets with the IPV6_2292HOPOPTS control message. Allow sending packets with the IPV6_2292DSTOPTS control message. Allow sending packets with the IPV6_RTHDRDSTOPTS control message. Allow setting the multicast routing socket options on non multicast routing sockets. Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, and SIOCDELTUNNEL ioctls for setting up, changing and deleting tunnels over ipv6. Allow the SIOCADDTUNNEL, SIOCCHGTUNNEL, SIOCDELTUNNEL ioctls for setting up, changing and deleting ipv6 over ipv4 tunnels. Allow the SIOCADDPRL, SIOCDELPRL, SIOCCHGPRL ioctls for adding, deleting, and changing the potential router list for ISATAP tunnels. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- net/ipv6/af_inet6.c | 3 ++- net/ipv6/anycast.c | 2 +- net/ipv6/datagram.c | 6 +++--- net/ipv6/ip6_flowlabel.c | 3 ++- net/ipv6/ip6_gre.c | 4 ++-- net/ipv6/ip6_tunnel.c | 4 ++-- net/ipv6/ip6mr.c | 2 +- net/ipv6/ipv6_sockglue.c | 7 ++++--- net/ipv6/netfilter/ip6_tables.c | 8 ++++---- net/ipv6/route.c | 2 +- net/ipv6/sit.c | 8 ++++---- 12 files changed, 28 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e21bdb9..67ac9f8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2413,7 +2413,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) struct in6_ifreq ireq; int err; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) @@ -2432,7 +2432,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) struct in6_ifreq ireq; int err; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7bafc51..4b29f6b 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -160,7 +160,8 @@ lookup_protocol: } err = -EPERM; - if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) + if (sock->type == SOCK_RAW && !kern && + !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4963c76..2f4f584 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -64,7 +64,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 93cbad2..8edf260 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -701,7 +701,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk, err = -EINVAL; goto exit_f; } - if (!capable(CAP_NET_RAW)) { + if (!ns_capable(net->user_ns, CAP_NET_RAW)) { err = -EPERM; goto exit_f; } @@ -721,7 +721,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk, err = -EINVAL; goto exit_f; } - if (!capable(CAP_NET_RAW)) { + if (!ns_capable(net->user_ns, CAP_NET_RAW)) { err = -EPERM; goto exit_f; } @@ -746,7 +746,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk, err = -EINVAL; goto exit_f; } - if (!capable(CAP_NET_RAW)) { + if (!ns_capable(net->user_ns, CAP_NET_RAW)) { err = -EPERM; goto exit_f; } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 90bbefb..29124b7 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -519,7 +519,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } read_unlock_bh(&ip6_sk_fl_lock); - if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) { + if (freq.flr_share == IPV6_FL_S_NONE && + ns_capable(net->user_ns, CAP_NET_ADMIN)) { fl = fl_lookup(net, freq.flr_label); if (fl) { err = fl6_renew(fl, freq.flr_linger, freq.flr_expires); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 823fd64..867466c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1146,7 +1146,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; err = -EFAULT; @@ -1194,7 +1194,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, case SIOCDELTUNNEL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; if (dev == ign->fb_tunnel_dev) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index bf3a549..fb828e9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1350,7 +1350,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) @@ -1383,7 +1383,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCDELTUNNEL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; if (dev == ip6n->fb_tnl_dev) { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f7c7c63..d7330f8 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1583,7 +1583,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns return -ENOENT; if (optname != MRT6_INIT) { - if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN)) + if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EACCES; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4b4172d..ee94d31 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -343,7 +343,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; case IPV6_TRANSPARENT: - if (valbool && !capable(CAP_NET_ADMIN) && !capable(CAP_NET_RAW)) { + if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) && + !ns_capable(net->user_ns, CAP_NET_RAW)) { retv = -EPERM; break; } @@ -381,7 +382,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, /* hop-by-hop / destination options are privileged option */ retv = -EPERM; - if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) + if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; opt = ipv6_renew_options(sk, np->opt, optname, @@ -754,7 +755,7 @@ done: case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: retv = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; retv = xfrm_user_policy(sk, optname, optval, optlen); break; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 10ce76a..74cadd0 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1854,7 +1854,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, { int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { @@ -1969,7 +1969,7 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { @@ -1991,7 +1991,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { @@ -2016,7 +2016,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c6215e2..a86b655 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2036,7 +2036,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch(cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = copy_from_user(&rtmsg, arg, sizeof(struct in6_rtmsg)); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ca6c2c8..fee21c6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -988,7 +988,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; err = -EFAULT; @@ -1032,7 +1032,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCDELTUNNEL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; if (dev == sitn->fb_tunnel_dev) { @@ -1065,7 +1065,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCDELPRL: case SIOCCHGPRL: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; err = -EINVAL; if (dev == sitn->fb_tunnel_dev) @@ -1094,7 +1094,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) case SIOCCHG6RD: case SIOCDEL6RD: err = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto done; err = -EFAULT; -- cgit v1.1 From c027aab4a6b1fe2541090ac04bee8ad246aeef70 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:10 +0000 Subject: net: Enable some sysctls that are safe for the userns root - Enable the per device ipv4 sysctls: net/ipv4/conf//forwarding net/ipv4/conf//mc_forwarding net/ipv4/conf//accept_redirects net/ipv4/conf//secure_redirects net/ipv4/conf//shared_media net/ipv4/conf//rp_filter net/ipv4/conf//send_redirects net/ipv4/conf//accept_source_route net/ipv4/conf//accept_local net/ipv4/conf//src_valid_mark net/ipv4/conf//proxy_arp net/ipv4/conf//medium_id net/ipv4/conf//bootp_relay net/ipv4/conf//log_martians net/ipv4/conf//tag net/ipv4/conf//arp_filter net/ipv4/conf//arp_announce net/ipv4/conf//arp_ignore net/ipv4/conf//arp_accept net/ipv4/conf//arp_notify net/ipv4/conf//proxy_arp_pvlan net/ipv4/conf//disable_xfrm net/ipv4/conf//disable_policy net/ipv4/conf//force_igmp_version net/ipv4/conf//promote_secondaries net/ipv4/conf//route_localnet - Enable the global ipv4 sysctl: net/ipv4/ip_forward - Enable the per device ipv6 sysctls: net/ipv6/conf//forwarding net/ipv6/conf//hop_limit net/ipv6/conf//mtu net/ipv6/conf//accept_ra net/ipv6/conf//accept_redirects net/ipv6/conf//autoconf net/ipv6/conf//dad_transmits net/ipv6/conf//router_solicitations net/ipv6/conf//router_solicitation_interval net/ipv6/conf//router_solicitation_delay net/ipv6/conf//force_mld_version net/ipv6/conf//use_tempaddr net/ipv6/conf//temp_valid_lft net/ipv6/conf//temp_prefered_lft net/ipv6/conf//regen_max_retry net/ipv6/conf//max_desync_factor net/ipv6/conf//max_addresses net/ipv6/conf//accept_ra_defrtr net/ipv6/conf//accept_ra_pinfo net/ipv6/conf//accept_ra_rtr_pref net/ipv6/conf//router_probe_interval net/ipv6/conf//accept_ra_rt_info_max_plen net/ipv6/conf//proxy_ndp net/ipv6/conf//accept_source_route net/ipv6/conf//optimistic_dad net/ipv6/conf//mc_forwarding net/ipv6/conf//disable_ipv6 net/ipv6/conf//accept_dad net/ipv6/conf//force_tllao - Enable the global ipv6 sysctls: net/ipv6/bindv6only net/ipv6/icmp/ratelimit Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ---- net/ipv6/icmp.c | 7 +------ net/ipv6/sysctl_net_ipv6.c | 4 ---- 3 files changed, 1 insertion(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 67ac9f8..d39fe49 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4741,10 +4741,6 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, t->addrconf_vars[i].extra2 = net; } - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - t->addrconf_vars[0].procname = NULL; - snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d77dc1e..b4a9fd5 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -967,14 +967,9 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) sizeof(ipv6_icmp_table_template), GFP_KERNEL); - if (table) { + if (table) table[0].data = &net->ipv6.sysctl.icmpv6_time; - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; - } - return table; } #endif diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index b06fd07..e85c48b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -52,10 +52,6 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) goto out; ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - ipv6_table[0].procname = NULL; - ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) goto out_ipv6_table; -- cgit v1.1 From b51642f6d77b131dc85d1d71029c3cbb5b07c262 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:11 +0000 Subject: net: Enable a userns root rtnl calls that are safe for unprivilged users - Only allow moving network devices to network namespaces you have CAP_NET_ADMIN privileges over. - Enable creating/deleting/modifying interfaces - Enable adding/deleting addresses - Enable adding/setting/deleting neighbour entries - Enable adding/removing routes - Enable adding/removing fib rules - Enable setting the forwarding state - Enable adding/removing ipv6 address labels - Enable setting bridge parameter Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ------ net/ipv6/addrlabel.c | 3 --- net/ipv6/route.c | 6 ------ 3 files changed, 15 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d39fe49..fc0e13a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3514,9 +3514,6 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct in6_addr *pfx; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3587,9 +3584,6 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 ifa_flags; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b106f80..ff76eec 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -425,9 +425,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, u32 label; int err = 0; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); if (err < 0) return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a86b655..8f124f5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2446,9 +2446,6 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a struct fib6_config cfg; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2464,9 +2461,6 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a struct fib6_config cfg; int err; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; -- cgit v1.1 From 3594698a1fb8e5ae60a92c72ce9ca280256939a7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 16 Nov 2012 03:03:12 +0000 Subject: net: Make CAP_NET_BIND_SERVICE per user namespace Allow privileged users in any user namespace to bind to privileged sockets in network namespaces they control. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4b29f6b..b043c60 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -283,7 +283,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; snum = ntohs(addr->sin6_port); - if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; lock_sock(sk); -- cgit v1.1 From e2f1f072db8db81e6b5bcbfcf409bb5c91dc9329 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 19 Nov 2012 22:41:45 +0000 Subject: sit: allow to configure 6rd tunnels via netlink This patch add the support of 6RD tunnels management via netlink. Note that netdev_state_change() is now called when 6RD parameters are updated. 6RD parameters are updated only if there is at least one 6RD attribute. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 124 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fee21c6..80cb382 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -936,6 +936,38 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) netdev_state_change(t->dev); } +#ifdef CONFIG_IPV6_SIT_6RD +static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, + struct ip_tunnel_6rd *ip6rd) +{ + struct in6_addr prefix; + __be32 relay_prefix; + + if (ip6rd->relay_prefixlen > 32 || + ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64) + return -EINVAL; + + ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen); + if (!ipv6_addr_equal(&prefix, &ip6rd->prefix)) + return -EINVAL; + if (ip6rd->relay_prefixlen) + relay_prefix = ip6rd->relay_prefix & + htonl(0xffffffffUL << + (32 - ip6rd->relay_prefixlen)); + else + relay_prefix = 0; + if (relay_prefix != ip6rd->relay_prefix) + return -EINVAL; + + t->ip6rd.prefix = prefix; + t->ip6rd.relay_prefix = relay_prefix; + t->ip6rd.prefixlen = ip6rd->prefixlen; + t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + netdev_state_change(t->dev); + return 0; +} +#endif + static int ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -1105,31 +1137,9 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = netdev_priv(dev); if (cmd != SIOCDEL6RD) { - struct in6_addr prefix; - __be32 relay_prefix; - - err = -EINVAL; - if (ip6rd.relay_prefixlen > 32 || - ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) - goto done; - - ipv6_addr_prefix(&prefix, &ip6rd.prefix, - ip6rd.prefixlen); - if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) goto done; - if (ip6rd.relay_prefixlen) - relay_prefix = ip6rd.relay_prefix & - htonl(0xffffffffUL << - (32 - ip6rd.relay_prefixlen)); - else - relay_prefix = 0; - if (relay_prefix != ip6rd.relay_prefix) - goto done; - - t->ip6rd.prefix = prefix; - t->ip6rd.relay_prefix = relay_prefix; - t->ip6rd.prefixlen = ip6rd.prefixlen; - t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; } else ipip6_tunnel_clone_6rd(dev, sitn); @@ -1261,11 +1271,53 @@ static void ipip6_netlink_parms(struct nlattr *data[], parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); } +#ifdef CONFIG_IPV6_SIT_6RD +/* This function returns true when 6RD attributes are present in the nl msg */ +static bool ipip6_netlink_6rd_parms(struct nlattr *data[], + struct ip_tunnel_6rd *ip6rd) +{ + bool ret = false; + memset(ip6rd, 0, sizeof(*ip6rd)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_6RD_PREFIX]) { + ret = true; + nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX], + sizeof(struct in6_addr)); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { + ret = true; + ip6rd->relay_prefix = + nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]); + } + + if (data[IFLA_IPTUN_6RD_PREFIXLEN]) { + ret = true; + ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) { + ret = true; + ip6rd->relay_prefixlen = + nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); + } + + return ret; +} +#endif + static int ipip6_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); struct ip_tunnel *nt; +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif + int err; nt = netdev_priv(dev); ipip6_netlink_parms(data, &nt->parms); @@ -1273,7 +1325,16 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, if (ipip6_tunnel_locate(net, &nt->parms, 0)) return -EEXIST; - return ipip6_tunnel_create(dev); + err = ipip6_tunnel_create(dev); + if (err < 0) + return err; + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + err = ipip6_tunnel_update_6rd(nt, &ip6rd); +#endif + + return err; } static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1283,6 +1344,9 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm p; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif if (dev == sitn->fb_tunnel_dev) return -EINVAL; @@ -1302,6 +1366,12 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], t = netdev_priv(dev); ipip6_tunnel_update(t, &p); + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + return ipip6_tunnel_update_6rd(t, &ip6rd); +#endif + return 0; } @@ -1322,6 +1392,16 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + +#ifdef CONFIG_IPV6_SIT_6RD + /* IFLA_IPTUN_6RD_PREFIX */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_6RD_RELAY_PREFIX */ + nla_total_size(4) + + /* IFLA_IPTUN_6RD_PREFIXLEN */ + nla_total_size(2) + + /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */ + nla_total_size(2) + +#endif 0; } @@ -1339,6 +1419,19 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; + +#ifdef CONFIG_IPV6_SIT_6RD + if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), + &tunnel->ip6rd.prefix) || + nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || + nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, + tunnel->ip6rd.prefixlen) || + nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + tunnel->ip6rd.relay_prefixlen)) + goto nla_put_failure; +#endif + return 0; nla_put_failure: @@ -1353,6 +1446,12 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, +#ifdef CONFIG_IPV6_SIT_6RD + [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, + [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 }, + [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 }, +#endif }; static struct rtnl_link_ops sit_link_ops __read_mostly = { -- cgit v1.1 From 2b9164771efe191c4ef266ae53c8c05ab92dd115 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 22 Nov 2012 01:13:58 +0000 Subject: ipv6: adapt connect for repair move This is work the same as for ipv4. All other hacks about tcp repair are in common code for ipv4 and ipv6, so this patch is enough for repairing ipv6 connections. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pavel Emelyanov Signed-off-by: Andrey Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6c0f252..6565cf5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -295,7 +295,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) goto late_failure; - if (!tp->write_seq) + if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, inet->inet_sport, -- cgit v1.1 From 53d6841d225b93c20d561878637c3cd307c11648 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 25 Nov 2012 09:35:30 +0000 Subject: ipv4/ipmr and ipv6/ip6mr: Convert int mroute_do_ to bool Save a few bytes per table by convert mroute_do_assert and mroute_do_pim from int to bool. Remove !! as the compiler does that when assigning int to bool. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d7330f8..79bb490 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -66,8 +66,8 @@ struct mr6_table { struct mif_device vif6_table[MAXMIFS]; int maxvif; atomic_t cache_resolve_queue_len; - int mroute_do_assert; - int mroute_do_pim; + bool mroute_do_assert; + bool mroute_do_pim; #ifdef CONFIG_IPV6_PIMSM_V2 int mroute_reg_vif_num; #endif @@ -1648,7 +1648,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns int v; if (get_user(v, (int __user *)optval)) return -EFAULT; - mrt->mroute_do_assert = !!v; + mrt->mroute_do_assert = v; return 0; } -- cgit v1.1 From 03f52a0a554210d5049eeed9f1bb29047dc807cb Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 25 Nov 2012 18:26:34 +0000 Subject: ip6mr: Add sizeof verification to MRT6_ASSERT and MT6_PIM Verify the length of the user-space arguments. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 79bb490..926ea54 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1646,6 +1646,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_ASSERT: { int v; + + if (optlen != sizeof(v)) + return -EINVAL; if (get_user(v, (int __user *)optval)) return -EFAULT; mrt->mroute_do_assert = v; @@ -1656,6 +1659,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_PIM: { int v; + + if (optlen != sizeof(v)) + return -EINVAL; if (get_user(v, (int __user *)optval)) return -EFAULT; v = !!v; -- cgit v1.1 From f4e0b4c5e1c3eac9b7376ce73fb63de436057db1 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 27 Nov 2012 03:07:11 +0000 Subject: ip6tnl/sit: drop packet if ECN present with not-ECT This patch reports the change made by Stephen Hemminger in ipip and gre[6] in commit eccc1bb8d4b4 (tunnel: drop packet if ECN present with not-ECT). Goal is to handle RFC6040, Section 4.2: Default Tunnel Egress Behaviour. o If the inner ECN field is Not-ECT, the decapsulator MUST NOT propagate any other ECN codepoint onwards. This is because the inner Not-ECT marking is set by transports that rely on dropped packets as an indication of congestion and would not understand or respond to any other ECN codepoint [RFC4774]. Specifically: * If the inner ECN field is Not-ECT and the outer ECN field is CE, the decapsulator MUST drop the packet. * If the inner ECN field is Not-ECT and the outer ECN field is Not-ECT, ECT(0), or ECT(1), the decapsulator MUST forward the outgoing packet with the ECN field cleared to Not-ECT. The patch takes benefits from common function added in net/inet_ecn.h. Like it was done for Xin4 tunnels, it adds logging to allow detecting broken systems that set ECN bits incorrectly when tunneling (or an intermediate router might be changing the header). Errors are also tracked via rx_frame_error. CC: Stephen Hemminger Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 49 ++++++++++++++++++++++++++++++++----------------- net/ipv6/sit.c | 33 +++++++++++++++++++++------------ 2 files changed, 53 insertions(+), 29 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fb828e9..a14f28b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -74,6 +74,10 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); @@ -683,28 +687,26 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } -static void ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, - struct sk_buff *skb) +static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) { __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); - if (INET_ECN_is_ce(dsfield)) - IP_ECN_set_ce(ip_hdr(skb)); + return IP6_ECN_decapsulate(ipv6h, skb); } -static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, - struct sk_buff *skb) +static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) { if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); - if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) - IP6_ECN_set_ce(ipv6_hdr(skb)); + return IP6_ECN_decapsulate(ipv6h, skb); } __u32 ip6_tnl_get_cap(struct ip6_tnl *t, @@ -768,12 +770,13 @@ EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, __u8 ipproto, - void (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, - struct sk_buff *skb)) + int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb)) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int err; rcu_read_lock(); @@ -803,14 +806,26 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, skb->pkt_type = PACKET_HOST; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + __skb_tunnel_rx(skb, t->dev); + + err = dscp_ecn_decapsulate(t, ipv6h, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", + &ipv6h->saddr, + ipv6_get_dsfield(ipv6h)); + if (err > 1) { + ++t->dev->stats.rx_frame_errors; + ++t->dev->stats.rx_errors; + rcu_read_unlock(); + goto discard; + } + } + tstats = this_cpu_ptr(t->dev->tstats); tstats->rx_packets++; tstats->rx_bytes += skb->len; - __skb_tunnel_rx(skb, t->dev); - - dscp_ecn_decapsulate(t, ipv6h, skb); - netif_rx(skb); rcu_read_unlock(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 80cb382..cfba99b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -65,6 +65,10 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); @@ -106,6 +110,7 @@ static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, } tot->rx_errors = dev->stats.rx_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; tot->tx_fifo_errors = dev->stats.tx_fifo_errors; tot->tx_carrier_errors = dev->stats.tx_carrier_errors; tot->tx_dropped = dev->stats.tx_dropped; @@ -585,16 +590,11 @@ out: return err; } -static inline void ipip6_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) -{ - if (INET_ECN_is_ce(iph->tos)) - IP6_ECN_set_ce(ipv6_hdr(skb)); -} - static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph; struct ip_tunnel *tunnel; + int err; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -616,18 +616,27 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; - kfree_skb(skb); - return 0; + goto out; + } + + __skb_tunnel_rx(skb, tunnel->dev); + + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto out; + } } tstats = this_cpu_ptr(tunnel->dev->tstats); tstats->rx_packets++; tstats->rx_bytes += skb->len; - __skb_tunnel_rx(skb, tunnel->dev); - - ipip6_ecn_decapsulate(iph, skb); - netif_rx(skb); return 0; -- cgit v1.1 From ce43b03e8889475817d427b1f3724c7e294b76eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 30 Nov 2012 09:49:27 +0000 Subject: net: move inet_dport/inet_num in sock_common commit 68835aba4d9b (net: optimize INET input path further) moved some fields used for tcp/udp sockets lookup in the first cache line of struct sock_common. This patch moves inet_dport/inet_num as well, filling a 32bit hole on 64 bit arches and reducing number of cache line misses in lookups. Also change INET_MATCH()/INET_TW_MATCH() to perform the ports match before addresses match, as this check is more discriminant. Remove the hash check from MATCH() macros because we dont need to re validate the hash value after taking a refcount on socket, and use likely/unlikely compiler hints, as the sk_hash/hash check makes the following conditional tests 100% predicted by cpu. Introduce skc_addrpair/skc_portpair pair values to better document the alignment requirements of the port/addr pairs used in the various MATCH() macros, and remove some casts. The namespace check can also be done at last. This slightly improves TCP/UDP lookup times. IP/TCP early demux needs inet->rx_dst_ifindex and TCP needs inet->min_ttl, lets group them together in same cache line. With help from Ben Hutchings & Joe Perches. Idea of this patch came after Ling Ma proposal to move skc_hash to the beginning of struct sock_common, and should allow him to submit a final version of his patch. My tests show an improvement doing so. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Joe Perches Cc: Ling Ma Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 73f1a00..dea17fd 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -87,11 +87,13 @@ struct sock *__inet6_lookup_established(struct net *net, rcu_read_lock(); begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { - /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (sk->sk_hash != hash) + continue; + if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) goto begintw; - if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, + ports, dif))) { sock_put(sk); goto begin; } @@ -104,12 +106,16 @@ begin: begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_nulls_for_each_rcu(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (sk->sk_hash != hash) + continue; + if (likely(INET6_TW_MATCH(sk, net, saddr, daddr, + ports, dif))) { if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { sk = NULL; goto out; } - if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, + ports, dif))) { sock_put(sk); goto begintw; } @@ -236,9 +242,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, /* Check TIME-WAIT sockets first. */ sk_nulls_for_each(sk2, node, &head->twchain) { - tw = inet_twsk(sk2); + if (sk2->sk_hash != hash) + continue; - if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { + if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, + ports, dif))) { + tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) goto unique; else @@ -249,7 +258,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, /* And established part... */ sk_nulls_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) + if (sk2->sk_hash != hash) + continue; + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) goto not_unique; } -- cgit v1.1 From aeaf6e9d2f49d793d3eb8c1af4095cf25e061b94 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Fri, 30 Nov 2012 10:25:59 +0000 Subject: ipv6: unify logic evaluating inet6_dev's accept_ra property As of 026359b [ipv6: Send ICMPv6 RSes only when RAs are accepted], the logic determining whether to send Router Solicitations is identical to the logic determining whether kernel accepts Router Advertisements. However the condition itself is repeated in several code locations. Unify it by introducing 'ipv6_accept_ra()' accessor. Also, simplify the condition expression, making it more readable. No semantic change. Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +-- net/ipv6/ndisc.c | 16 ++-------------- 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fc0e13a..4b644f6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3150,8 +3150,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) router advertisements, start sending router solicitations. */ - if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) || - ifp->idev->cnf.accept_ra == 2) && + if (ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0 && (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f41853b..cf43b65 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1032,18 +1032,6 @@ errout: rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); } -static inline int accept_ra(struct inet6_dev *in6_dev) -{ - /* - * If forwarding is enabled, RA are not accepted unless the special - * hybrid mode (accept_ra=2) is enabled. - */ - if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2) - return 0; - - return in6_dev->cnf.accept_ra; -} - static void ndisc_router_discovery(struct sk_buff *skb) { struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); @@ -1091,7 +1079,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - if (!accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) goto skip_linkparms; #ifdef CONFIG_IPV6_NDISC_NODETYPE @@ -1247,7 +1235,7 @@ skip_linkparms: NEIGH_UPDATE_F_ISROUTER); } - if (!accept_ra(in6_dev)) + if (!ipv6_accept_ra(in6_dev)) goto out; #ifdef CONFIG_IPV6_ROUTE_INFO -- cgit v1.1 From a0ecb85a2c3af73c63b6d44ce82aea52347ccf55 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 30 Nov 2012 12:37:26 +0000 Subject: netfilter: nf_nat: Handle routing changes in MASQUERADE target When the route changes (backup default route, VPNs) which affect a masqueraded target, the packets were sent out with the outdated source address. The patch addresses the issue by comparing the outgoing interface directly with the masqueraded interface in the nat table. Events are inefficient in this case, because it'd require adding route events to the network core and then scanning the whole conntrack table and re-checking the route for all entry. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6table_nat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index fa84cf8..6c8ae24 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -137,6 +137,10 @@ nf_nat_ipv6_fn(unsigned int hooknum, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; + } } return nf_nat_packet(ct, ctinfo, hooknum, skb); -- cgit v1.1 From 9ba2add3cf5c103b7236f82a023c8ee05a51e4d1 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Sun, 2 Dec 2012 01:44:53 +0000 Subject: ipv6: Make 'addrconf_rs_timer' send Router Solicitations (and re-arm itself) if Router Advertisements are accepted As of 026359b [ipv6: Send ICMPv6 RSes only when RAs are accepted], Router Solicitations are sent whenever kernel accepts Router Advertisements on the interface. However, this logic isn't reflected in 'addrconf_rs_timer'. The timer fails to issue subsequent RS messages (and fails to re-arm itself) if forwarding is enabled and the special hybrid mode is enabled (accept_ra=2). Fix the condition determining whether next RS should be sent, by using 'ipv6_accept_ra()'. Reported-by: Ami Koren Signed-off-by: Shmulik Ladkani Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4b644f6..22ae75d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2988,7 +2988,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->dead || !(idev->if_flags & IF_READY)) goto out; - if (idev->cnf.forwarding) + if (!ipv6_accept_ra(idev)) goto out; /* Announcement received after solicitation was sent */ -- cgit v1.1 From a5a81f0b9025867efb999d14a8dfc1907c5a4c3b Mon Sep 17 00:00:00 2001 From: Paul Marks Date: Mon, 3 Dec 2012 10:26:54 +0000 Subject: ipv6: Fix default route failover when CONFIG_IPV6_ROUTER_PREF=n I believe this commit from 2008 was incorrect: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=398bcbebb6f721ac308df1e3d658c0029bb74503 When CONFIG_IPV6_ROUTER_PREF is disabled, the kernel should follow RFC4861 section 6.3.6: if no route is NUD_VALID, then traffic should be sprayed across all routers (indirectly triggering NUD) until one of them becomes NUD_VALID. However, the following experiment demonstrates that this does not work: 1) Connect to an IPv6 network. 2) Change the router's MAC (and link-local) address. The kernel will lock onto the first router and never try the new one, even if the first becomes unreachable. This patch fixes the problem by allowing rt6_check_neigh() to return 0; if all routers return 0, then rt6_select() will fall back to round-robin behavior. This patch should have no effect when CONFIG_IPV6_ROUTER_PREF=y. Note that rt6_check_neigh() is only used in a boolean context, so I've changed its return type accordingly. Signed-off-by: Paul Marks Signed-off-by: David S. Miller --- net/ipv6/route.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8f124f5..e229a3b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -544,35 +544,32 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline int rt6_check_neigh(struct rt6_info *rt) +static inline bool rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - int m; + bool ret = false; neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - m = 1; + ret = true; else if (neigh) { read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) - m = 2; + ret = true; #ifdef CONFIG_IPV6_ROUTER_PREF - else if (neigh->nud_state & NUD_FAILED) - m = 0; + else if (!(neigh->nud_state & NUD_FAILED)) + ret = true; #endif - else - m = 1; read_unlock_bh(&neigh->lock); - } else - m = 0; - return m; + } + return ret; } static int rt6_score_route(struct rt6_info *rt, int oif, int strict) { - int m, n; + int m; m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) @@ -580,8 +577,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - n = rt6_check_neigh(rt); - if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) + if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) return -1; return m; } -- cgit v1.1 From d67b8c616b48df30e2836d797795f2420d109bc9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:35 +0000 Subject: netconf: advertise mc_forwarding status This patch advertise the MC_FORWARDING status for IPv4 and IPv6. This field is readonly, only multicast engine in the kernel updates it. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 ++++++++-- net/ipv6/ip6mr.c | 20 ++++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 22ae75d..28e0e62 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -469,6 +469,8 @@ static int inet6_netconf_msgsize_devconf(int type) /* type -1 is used for ALL */ if (type == -1 || type == NETCONFA_FORWARDING) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_MC_FORWARDING) + size += nla_total_size(4); return size; } @@ -496,6 +498,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((type == -1 || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_MC_FORWARDING) && + nla_put_s32(skb, NETCONFA_MC_FORWARDING, + devconf->mc_forwarding) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -504,8 +510,8 @@ nla_put_failure: return -EMSGSIZE; } -static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf) +void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 926ea54..1c05fe6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -52,6 +52,7 @@ #include #include #include +#include struct mr6_table { struct list_head list; @@ -805,8 +806,12 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) dev_set_allmulti(dev, -1); in6_dev = __in6_dev_get(dev); - if (in6_dev) + if (in6_dev) { in6_dev->cnf.mc_forwarding--; + inet6_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in6_dev->cnf); + } if (v->flags & MIFF_REGISTER) unregister_netdevice_queue(dev, head); @@ -958,8 +963,12 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, } in6_dev = __in6_dev_get(dev); - if (in6_dev) + if (in6_dev) { in6_dev->cnf.mc_forwarding++; + inet6_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in6_dev->cnf); + } /* * Fill in the VIF structures @@ -1513,6 +1522,9 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) if (likely(mrt->mroute6_sk == NULL)) { mrt->mroute6_sk = sk; net->ipv6.devconf_all->mc_forwarding++; + inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else err = -EADDRINUSE; @@ -1535,6 +1547,10 @@ int ip6mr_sk_done(struct sock *sk) write_lock_bh(&mrt_lock); mrt->mroute6_sk = NULL; net->ipv6.devconf_all->mc_forwarding--; + inet6_netconf_notify_devconf(net, + NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); write_unlock_bh(&mrt_lock); mroute_clean_tables(mrt); -- cgit v1.1 From 70b386a0cc65041fb01aacf5d4b8d1fa49fc8ce9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:36 +0000 Subject: ip6mr: use nla_nest_* helpers This patch removes the skb manipulations when nested attributes are added by using standard helpers. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 1c05fe6..23f364a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2119,8 +2119,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, { int ct; struct rtnexthop *nhp; - u8 *b = skb_tail_pointer(skb); - struct rtattr *mp_head; + struct nlattr *mp_attr; /* If cache is unresolved, don't try to parse IIF and OIF */ if (c->mf6c_parent >= MAXMIFS) @@ -2129,28 +2128,29 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (MIF_EXISTS(mrt, c->mf6c_parent) && nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) return -EMSGSIZE; - - mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); + mp_attr = nla_nest_start(skb, RTA_MULTIPATH); + if (mp_attr == NULL) + return -EMSGSIZE; for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) - goto rtattr_failure; - nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); + if (nhp == NULL) { + nla_nest_cancel(skb, mp_attr); + return -EMSGSIZE; + } + nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } - mp_head->rta_type = RTA_MULTIPATH; - mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head; + + nla_nest_end(skb, mp_attr); + rtm->rtm_type = RTN_MULTICAST; return 1; - -rtattr_failure: - nlmsg_trim(skb, b); - return -EMSGSIZE; } int ip6mr_get_route(struct net *net, -- cgit v1.1 From adfa85e45dac616ff4f8bfceff1621ccafc0b1ff Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:37 +0000 Subject: ipmr/ip6mr: advertise mfc stats via rtnetlink These statistics can be checked only via /proc/net/ip_mr_cache or SIOCGETSGCNT[_IN6] and thus only for the table RT_TABLE_DEFAULT. Advertising them via rtnetlink allows to get statistics for all cache entries, whatever the table is. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 23f364a..4220a7b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2120,6 +2120,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, int ct; struct rtnexthop *nhp; struct nlattr *mp_attr; + struct rta_mfc_stats mfcs; /* If cache is unresolved, don't try to parse IIF and OIF */ if (c->mf6c_parent >= MAXMIFS) @@ -2149,6 +2150,12 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + mfcs.mfcs_packets = c->mfc_un.res.pkt; + mfcs.mfcs_bytes = c->mfc_un.res.bytes; + mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; + if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + return -EMSGSIZE; + rtm->rtm_type = RTN_MULTICAST; return 1; } -- cgit v1.1 From 9a68ac72a44ecb6d4dc4a7cadf45e1a2cd183885 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:38 +0000 Subject: ipmr/ip6mr: report origin of mfc entry into rtnl msg A mfc entry can be static or not (added via the mroute_sk socket). The patch reports MFC_STATIC flag into rtm_protocol by setting rtm_protocol to RTPROT_STATIC or RTPROT_MROUTED. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4220a7b..d51b911 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2249,7 +2249,10 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (nla_put_u32(skb, RTA_TABLE, mrt->id)) goto nla_put_failure; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = RTPROT_UNSPEC; + if (c->mfc_flags & MFC_STATIC) + rtm->rtm_protocol = RTPROT_STATIC; + else + rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || -- cgit v1.1 From 1eb99af52c4bc705f4042f37f255975acfc738f2 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:39 +0000 Subject: ipmr/ip6mr: allow to get unresolved cache via netlink /proc/net/ip[6]_mr_cache allows to get all mfc entries, even if they are put in the unresolved list (mfc[6]_unres_queue). But only the table RT_TABLE_DEFAULT is displayed. This patch adds the parsing of the unresolved list when the dump is made via rtnetlink, hence each table can be checked. In IPv6, we set rtm_type in ip6mr_fill_mroute(), because in case of unresolved mfc __ip6mr_fill_mroute() will not set it. In IPv4, it is already done. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d51b911..93a7698 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2235,6 +2235,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, { struct nlmsghdr *nlh; struct rtmsg *rtm; + int err; nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) @@ -2248,6 +2249,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, rtm->rtm_table = mrt->id; if (nla_put_u32(skb, RTA_TABLE, mrt->id)) goto nla_put_failure; + rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; if (c->mfc_flags & MFC_STATIC) rtm->rtm_protocol = RTPROT_STATIC; @@ -2258,7 +2260,9 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp)) goto nla_put_failure; - if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0) + err = __ip6mr_fill_mroute(mrt, skb, c, rtm); + /* do not break the dump if cache is unresolved */ + if (err < 0 && err != -ENOENT) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2301,6 +2305,22 @@ next_entry: } e = s_e = 0; } + spin_lock_bh(&mfc_unres_lock); + list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) { + if (e < s_e) + goto next_entry2; + if (ip6mr_fill_mroute(mrt, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + mfc) < 0) { + spin_unlock_bh(&mfc_unres_lock); + goto done; + } +next_entry2: + e++; + } + spin_unlock_bh(&mfc_unres_lock); + e = s_e = 0; s_h = 0; next_table: t++; -- cgit v1.1 From 812e44dd1829488096929ff362f749ae04dc71a0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:41 +0000 Subject: ip6mr: advertise new mfc entries via rtnl This patch allows to monitor mf6c activities via rtnetlink. To avoid parsing two times the mf6c oifs, we use maxvif to allocate the rtnl msg, thus we may allocate some superfluous space. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 93a7698..580e5e0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -116,6 +116,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); +static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, + int cmd); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); static void mroute_clean_tables(struct mr6_table *mrt); @@ -870,6 +872,7 @@ static void ipmr_do_expire_process(struct mr6_table *mrt) } list_del(&c->list); + mr6_netlink_event(mrt, c, RTM_DELROUTE); ip6mr_destroy_unres(mrt, c); } @@ -1220,6 +1223,7 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) atomic_inc(&mrt->cache_resolve_queue_len); list_add(&c->list, &mrt->mfc6_unres_queue); + mr6_netlink_event(mrt, c, RTM_NEWROUTE); ipmr_do_expire_process(mrt); } @@ -1257,6 +1261,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc) list_del(&c->list); write_unlock_bh(&mrt_lock); + mr6_netlink_event(mrt, c, RTM_DELROUTE); ip6mr_cache_free(c); return 0; } @@ -1421,6 +1426,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); + mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1465,6 +1471,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, ip6mr_cache_resolve(net, mrt, uc, c); ip6mr_cache_free(uc); } + mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1498,6 +1505,7 @@ static void mroute_clean_tables(struct mr6_table *mrt) list_del(&c->list); write_unlock_bh(&mrt_lock); + mr6_netlink_event(mrt, c, RTM_DELROUTE); ip6mr_cache_free(c); } } @@ -1506,6 +1514,7 @@ static void mroute_clean_tables(struct mr6_table *mrt) spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { list_del(&c->list); + mr6_netlink_event(mrt, c, RTM_DELROUTE); ip6mr_destroy_unres(mrt, c); } spin_unlock_bh(&mfc_unres_lock); @@ -2231,13 +2240,13 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc6_cache *c) + u32 portid, u32 seq, struct mfc6_cache *c, int cmd) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2272,6 +2281,52 @@ nla_put_failure: return -EMSGSIZE; } +static int mr6_msgsize(bool unresolved, int maxvif) +{ + size_t len = + NLMSG_ALIGN(sizeof(struct rtmsg)) + + nla_total_size(4) /* RTA_TABLE */ + + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */ + + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */ + ; + + if (!unresolved) + len = len + + nla_total_size(4) /* RTA_IIF */ + + nla_total_size(0) /* RTA_MULTIPATH */ + + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) + /* RTA_MFC_STATS */ + + nla_total_size(sizeof(struct rta_mfc_stats)) + ; + + return len; +} + +static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, + int cmd) +{ + struct net *net = read_pnet(&mrt->net); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif), + GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + if (err < 0) + goto errout; + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC); + return; + +errout: + kfree_skb(skb); + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err); +} + static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -2298,7 +2353,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc) < 0) + mfc, RTM_NEWROUTE) < 0) goto done; next_entry: e++; @@ -2312,7 +2367,7 @@ next_entry: if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc) < 0) { + mfc, RTM_NEWROUTE) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } -- cgit v1.1 From 193c1e478cc496844fcbef402a10976c95a634ff Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:01:49 +0000 Subject: ip6mr: fix rtm_family of rtnl msg We talk about IPv6, hence the family is RTNL_FAMILY_IP6MR! rtnl_register() is already called with RTNL_FAMILY_IP6MR. The bug is here since the beginning of this function (commit 5b285cac3570). Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 580e5e0..26dcdec 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2251,7 +2251,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, return -EMSGSIZE; rtm = nlmsg_data(nlh); - rtm->rtm_family = RTNL_FAMILY_IPMR; + rtm->rtm_family = RTNL_FAMILY_IP6MR; rtm->rtm_dst_len = 128; rtm->rtm_src_len = 128; rtm->rtm_tos = 0; -- cgit v1.1 From b1afce9538eef6239798ec54cead3c39d69724ca Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 4 Dec 2012 14:46:34 -0500 Subject: ipv6: Protect ->mc_forwarding access with CONFIG_IPV6_MROUTE Reported-by: Fengguang Wu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 28e0e62..6fca01f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -469,8 +469,10 @@ static int inet6_netconf_msgsize_devconf(int type) /* type -1 is used for ALL */ if (type == -1 || type == NETCONFA_FORWARDING) size += nla_total_size(4); +#ifdef CONFIG_IPV6_MROUTE if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); +#endif return size; } @@ -498,11 +500,12 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((type == -1 || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; +#ifdef CONFIG_IPV6_MROUTE if ((type == -1 || type == NETCONFA_MC_FORWARDING) && nla_put_s32(skb, NETCONFA_MC_FORWARDING, devconf->mc_forwarding) < 0) goto nla_put_failure; - +#endif return nlmsg_end(skb, nlh); nla_put_failure: -- cgit v1.1 From 0e1efe9d5e10921f1e2152b108e013605fca3c9f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Dec 2012 09:18:10 +0000 Subject: ipv6: avoid taking locks at socket dismantle ipv6_sock_mc_close() is called for ipv6 sockets at close time, and most of them don't use multicast. Add a test to avoid contention on a shared spinlock. Same heuristic applies for ipv6_sock_ac_close(), to avoid contention on a shared rwlock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/anycast.c | 3 +++ net/ipv6/mcast.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 2f4f584..757a810 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -189,6 +189,9 @@ void ipv6_sock_ac_close(struct sock *sk) struct net *net = sock_net(sk); int prev_index; + if (!np->ipv6_ac_list) + return; + write_lock_bh(&ipv6_sk_ac_lock); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index b19ed51..28dfa5f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -284,6 +284,9 @@ void ipv6_sock_mc_close(struct sock *sk) struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); + if (!rcu_access_pointer(np->ipv6_mc_list)) + return; + spin_lock(&ipv6_sk_mc_lock); while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { -- cgit v1.1 From fd0ea7dbfae16015e72c4bbc6b1b43fffc3b914f Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 13 Dec 2012 02:40:26 +0900 Subject: ndisc: Unexport ndisc_{build,send}_skb(). These symbols were exported for bonding device by commit 305d552a ("bonding: send IPv6 neighbor advertisement on failover"). It bacame obsolete by commit 7c899432 ("bonding, ipv4, ipv6, vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS") and removed by commit 4f5762ec ("bonding: Remove obsolete source file 'bond_ipv6.c'"). Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index cf43b65..4c02e6a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -370,12 +370,12 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -struct sk_buff *ndisc_build_skb(struct net_device *dev, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, - const struct in6_addr *target, - int llinfo) +static struct sk_buff *ndisc_build_skb(struct net_device *dev, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, + const struct in6_addr *target, + int llinfo) { struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; @@ -431,14 +431,11 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, return skb; } -EXPORT_SYMBOL(ndisc_build_skb); - -void ndisc_send_skb(struct sk_buff *skb, - struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h) +static void ndisc_send_skb(struct sk_buff *skb, struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h) { struct flowi6 fl6; struct dst_entry *dst; @@ -473,8 +470,6 @@ void ndisc_send_skb(struct sk_buff *skb, rcu_read_unlock(); } -EXPORT_SYMBOL(ndisc_send_skb); - /* * Send a Neighbour Discover packet */ -- cgit v1.1