diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/Kconfig | 1 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 7 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 3 | ||||
-rw-r--r-- | net/ipv6/exthdrs_core.c | 6 | ||||
-rw-r--r-- | net/ipv6/ip6_flowlabel.c | 5 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 3 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 6 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 2 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 3 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 74 | ||||
-rw-r--r-- | net/ipv6/route.c | 7 | ||||
-rw-r--r-- | net/ipv6/sit.c | 4 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 3 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 42 | ||||
-rw-r--r-- | net/ipv6/udp.c | 38 |
15 files changed, 151 insertions, 53 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index bb7dabe..40c8975 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -69,6 +69,7 @@ config INET6_ESP select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES + select CRYPTO_ECHAINIV ---help--- Support for IPsec ESP. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 38eedde..bdd7eac 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -583,7 +583,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; @@ -3538,6 +3538,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; + bool notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3583,7 +3584,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ - ipv6_ifa_notify(RTM_NEWADDR, ifp); + notify = true; } } @@ -3591,6 +3592,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) out: spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); + if (notify) + ipv6_ifa_notify(RTM_NEWADDR, ifp); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 517c55b..4281621 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -162,6 +162,9 @@ ipv4_connected: fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 5c5d23e..9508a20 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, *fragoff = _frag_off; return hp->nexthdr; } - return -ENOENT; + if (!found) + return -ENOENT; + if (fragoff) + *fragoff = _frag_off; + break; } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 1f9ebe3..dc2db4f 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference(*sflp)) != NULL; + (sfl = rcu_dereference_protected(*sflp, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = rcu_dereference(sfl->next); + *sflp = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree_rcu(sfl, rcu); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f37f18b..c0d4dc1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -777,6 +777,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) __u32 mtu; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; @@ -1512,6 +1514,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->destructor = ip6gre_dev_free; dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; } static int ip6gre_newlink(struct net *src_net, struct net_device *dev, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 23de98f..a163102 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct rt6_info *rt; #endif int err; + int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, @@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, dst_release(*dst); *dst = NULL; } + + if (fl6->flowi6_oif) + flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 137fca4..6c5dfec 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1180,6 +1180,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + tproto = ACCESS_ONCE(t->parms.proto); if (tproto != IPPROTO_IPIP && tproto != 0) return -1; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ee56d0..d64ee7e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) return NULL; skb->priority = TC_PRIO_CONTROL; - skb->reserved_tailroom = skb_end_offset(skb) - - min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); + skb_tailroom_reserve(skb, mtu, tlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 31ba7ca..051b6a6 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -21,6 +21,10 @@ #include <net/ipv6.h> #include <net/netfilter/ipv6/nf_nat_masquerade.h> +#define MAX_WORK_COUNT 16 + +static atomic_t v6_worker_count; + unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, const struct net_device *out) @@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = { .notifier_call = masq_device_event, }; +struct masq_dev_work { + struct work_struct work; + struct net *net; + int ifindex; +}; + +static void iterate_cleanup_work(struct work_struct *work) +{ + struct masq_dev_work *w; + long index; + + w = container_of(work, struct masq_dev_work, work); + + index = w->ifindex; + nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0); + + put_net(w->net); + kfree(w); + atomic_dec(&v6_worker_count); + module_put(THIS_MODULE); +} + +/* ipv6 inet notifier is an atomic notifier, i.e. we cannot + * schedule. + * + * Unfortunately, nf_ct_iterate_cleanup can run for a long + * time if there are lots of conntracks and the system + * handles high softirq load, so it frequently calls cond_resched + * while iterating the conntrack table. + * + * So we defer nf_ct_iterate_cleanup walk to the system workqueue. + * + * As we can have 'a lot' of inet_events (depending on amount + * of ipv6 addresses being deleted), we also need to add an upper + * limit to the number of queued work items. + */ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; - struct netdev_notifier_info info; + const struct net_device *dev; + struct masq_dev_work *w; + struct net *net; + + if (event != NETDEV_DOWN || + atomic_read(&v6_worker_count) >= MAX_WORK_COUNT) + return NOTIFY_DONE; + + dev = ifa->idev->dev; + net = maybe_get_net(dev_net(dev)); + if (!net) + return NOTIFY_DONE; - netdev_notifier_info_init(&info, ifa->idev->dev); - return masq_device_event(this, event, &info); + if (!try_module_get(THIS_MODULE)) + goto err_module; + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (w) { + atomic_inc(&v6_worker_count); + + INIT_WORK(&w->work, iterate_cleanup_work); + w->ifindex = dev->ifindex; + w->net = net; + schedule_work(&w->work); + + return NOTIFY_DONE; + } + + module_put(THIS_MODULE); + err_module: + put_net(net); + return NOTIFY_DONE; } static struct notifier_block masq_inet_notifier = { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3c8834b..ed44663 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1183,11 +1183,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6) +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags) { struct dst_entry *dst; - int flags = 0; bool any_src; dst = l3mdev_rt6_dst_by_oif(net, fl6); @@ -1208,7 +1207,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } -EXPORT_SYMBOL(ip6_route_output); +EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e794ef6..2066d1c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -201,14 +201,14 @@ static int ipip6_tunnel_create(struct net_device *dev) if ((__force u16)t->parms.i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; + dev->rtnl_link_ops = &sit_link_ops; + err = register_netdevice(dev); if (err < 0) goto out; ipip6_tunnel_clone_6rd(dev, sitn); - dev->rtnl_link_ops = &sit_link_ops; - dev_hold(dev); ipip6_tunnel_link(sitn, t); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2906ef2..aae3e5c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -41,8 +41,7 @@ static __u16 const msstab[] = { 9000 - 60, }; -static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], - ipv6_cookie_scratch); +static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, u32 count, int c) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 006396e..3447859 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -66,7 +66,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/crypto.h> +#include <crypto/hash.h> #include <linux/scatterlist.h> static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); @@ -327,6 +327,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; + bool fatal; int err; sk = __inet6_lookup_established(net, &tcp_hashinfo, @@ -345,8 +346,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } seq = ntohl(th->seq); + fatal = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, fatal); bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -400,7 +402,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { @@ -540,7 +541,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, bp->len = cpu_to_be32(nbytes); sg_init_one(&sg, bp, sizeof(*bp)); - return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->md5_req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, @@ -548,14 +550,14 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; - struct hash_desc *desc; + struct ahash_request *req; hp = tcp_get_md5sig_pool(); if (!hp) goto clear_hash_noput; - desc = &hp->md5_desc; + req = hp->md5_req; - if (crypto_hash_init(desc)) + if (crypto_ahash_init(req)) goto clear_hash; if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) goto clear_hash; @@ -563,7 +565,8 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; - if (crypto_hash_final(desc, md5_hash)) + ahash_request_set_crypt(req, NULL, md5_hash, 0); + if (crypto_ahash_final(req)) goto clear_hash; tcp_put_md5sig_pool(); @@ -583,7 +586,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, { const struct in6_addr *saddr, *daddr; struct tcp_md5sig_pool *hp; - struct hash_desc *desc; + struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); if (sk) { /* valid for establish/request sockets */ @@ -598,9 +601,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, hp = tcp_get_md5sig_pool(); if (!hp) goto clear_hash_noput; - desc = &hp->md5_desc; + req = hp->md5_req; - if (crypto_hash_init(desc)) + if (crypto_ahash_init(req)) goto clear_hash; if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) @@ -611,7 +614,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; - if (crypto_hash_final(desc, md5_hash)) + ahash_request_set_crypt(req, NULL, md5_hash, 0); + if (crypto_ahash_final(req)) goto clear_hash; tcp_put_md5sig_pool(); @@ -1386,7 +1390,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; tcp_v6_fill_cb(skb, hdr, th); @@ -1394,24 +1398,24 @@ process: reqsk_put(req); goto discard_it; } - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); tcp_v6_restore_cb(skb); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5d2c2af..422dd01 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -257,6 +257,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, struct sock *sk, *result; struct hlist_nulls_node *node; int score, badness, matches = 0, reuseport = 0; + bool select_ok = true; u32 hash = 0; begin: @@ -270,14 +271,18 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (sk2) { - result = sk2; - goto found; + if (select_ok) { + struct sock *sk2; + + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (sk2) { + result = sk2; + select_ok = false; + goto found; + } } matches = 1; } @@ -321,6 +326,7 @@ struct sock *__udp6_lib_lookup(struct net *net, unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness, matches = 0, reuseport = 0; + bool select_ok = true; u32 hash = 0; rcu_read_lock(); @@ -358,14 +364,18 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { - struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash, skb, + if (select_ok) { + struct sock *sk2; + + sk2 = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (sk2) { - result = sk2; - goto found; + if (sk2) { + result = sk2; + select_ok = false; + goto found; + } } matches = 1; } @@ -952,11 +962,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ + /* a return value > 0 means to resubmit the input */ if (ret > 0) - return -ret; + return ret; return 0; } |