diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/8021q/vlan_core.c | 10 | ||||
-rw-r--r-- | net/core/dev.c | 59 | ||||
-rw-r--r-- | net/core/neighbour.c | 6 | ||||
-rw-r--r-- | net/core/skbuff.c | 47 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 3 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 2 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 146 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 1 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 22 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_xmit.c | 6 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 29 |
14 files changed, 177 insertions, 164 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index add69d0..fbbf1fa 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -5,7 +5,7 @@ #include <linux/export.h> #include "vlan.h" -bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) +bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; @@ -13,14 +13,8 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); - if (!vlan_dev) { - /* Only the last call to vlan_do_receive() should change - * pkt_type to PACKET_OTHERHOST - */ - if (vlan_id && last_handler) - skb->pkt_type = PACKET_OTHERHOST; + if (!vlan_dev) return false; - } skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) diff --git a/net/core/dev.c b/net/core/dev.c index 1e0a184..09cb3f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3300,18 +3300,18 @@ ncls: && !skb_pfmemalloc_protocol(skb)) goto drop; - rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_do_receive(&skb, !rx_handler)) + if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) goto unlock; } + rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); @@ -3331,6 +3331,9 @@ ncls: } } + if (vlan_tx_nonzero_tag_present(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; @@ -3471,17 +3474,31 @@ out: return netif_receive_skb(skb); } -inline void napi_gro_flush(struct napi_struct *napi) +/* napi->gro_list contains packets ordered by age. + * youngest packets at the head of it. + * Complete skbs in reverse order to reduce latencies. + */ +void napi_gro_flush(struct napi_struct *napi, bool flush_old) { - struct sk_buff *skb, *next; + struct sk_buff *skb, *prev = NULL; - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; + /* scan list and build reverse chain */ + for (skb = napi->gro_list; skb != NULL; skb = skb->next) { + skb->prev = prev; + prev = skb; + } + + for (skb = prev; skb; skb = prev) { skb->next = NULL; + + if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) + return; + + prev = skb->prev; napi_gro_complete(skb); + napi->gro_count--; } - napi->gro_count = 0; napi->gro_list = NULL; } EXPORT_SYMBOL(napi_gro_flush); @@ -3542,6 +3559,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) napi->gro_count++; NAPI_GRO_CB(skb)->count = 1; + NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; @@ -3631,20 +3649,22 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) } EXPORT_SYMBOL(napi_skb_finish); -void skb_gro_reset_offset(struct sk_buff *skb) +static void skb_gro_reset_offset(struct sk_buff *skb) { + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + NAPI_GRO_CB(skb)->data_offset = 0; NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { - NAPI_GRO_CB(skb)->frag0 = - skb_frag_address(&skb_shinfo(skb)->frags[0]); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]); + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); } } -EXPORT_SYMBOL(skb_gro_reset_offset); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { @@ -3876,7 +3896,7 @@ void napi_complete(struct napi_struct *n) if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) return; - napi_gro_flush(n); + napi_gro_flush(n, false); local_irq_save(flags); __napi_complete(n); local_irq_restore(flags); @@ -3981,8 +4001,17 @@ static void net_rx_action(struct softirq_action *h) local_irq_enable(); napi_complete(n); local_irq_disable(); - } else + } else { + if (n->gro_list) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + local_irq_enable(); + napi_gro_flush(n, HZ >= 1000); + local_irq_disable(); + } list_move_tail(&n->poll_list, &sd->poll_list); + } } netpoll_poll_unlock(have); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index baca771..2257148 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1301,8 +1301,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) if (!dst) goto discard; - __skb_pull(skb, skb_network_offset(skb)); - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; @@ -1312,6 +1310,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) neigh_hh_init(neigh, dst); do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); @@ -1342,9 +1341,8 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) unsigned int seq; int err; - __skb_pull(skb, skb_network_offset(skb)); - do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cdc2859..6e04b1f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -655,53 +655,6 @@ void consume_skb(struct sk_buff *skb) } EXPORT_SYMBOL(consume_skb); -/** - * skb_recycle - clean up an skb for reuse - * @skb: buffer - * - * Recycles the skb to be reused as a receive buffer. This - * function does any necessary reference count dropping, and - * cleans up the skbuff as if it just came from __alloc_skb(). - */ -void skb_recycle(struct sk_buff *skb) -{ - struct skb_shared_info *shinfo; - - skb_release_head_state(skb); - - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = skb->head + NET_SKB_PAD; - skb_reset_tail_pointer(skb); -} -EXPORT_SYMBOL(skb_recycle); - -/** - * skb_recycle_check - check if skb can be reused for receive - * @skb: buffer - * @skb_size: minimum receive buffer size - * - * Checks that the skb passed in is not shared or cloned, and - * that it is linear and its head portion at least as large as - * skb_size so that it can be recycled as a receive buffer. - * If these conditions are met, this function does any necessary - * reference count dropping and cleans up the skbuff as if it - * just came from __alloc_skb(). - */ -bool skb_recycle_check(struct sk_buff *skb, int skb_size) -{ - if (!skb_is_recycleable(skb, skb_size)) - return false; - - skb_recycle(skb); - - return true; -} -EXPORT_SYMBOL(skb_recycle_check); - static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 68c93d1..825c608 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -322,7 +322,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (!r && !fib_num_tclassid_users(dev_net(dev))) { + if (!r && !fib_num_tclassid_users(dev_net(dev)) && + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { *itag = 0; return 0; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2677530..71b125c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -840,6 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) change_nexthops(fi) { nexthop_nh->nh_parent = fi; nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); + if (!nexthop_nh->nh_pcpu_rth_output) + goto failure; } endfor_nexthops(fi) if (cfg->fc_mx) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f0c5b9c..d34ce29 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -406,7 +406,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; return &rt->dst; @@ -442,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; rcu_read_unlock(); return &rt->dst; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index ab09b12..694de3b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) + if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 24a29a3..6537a40 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -193,7 +193,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; + nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); @@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ff62206..1a0da8d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -802,7 +802,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) net = dev_net(rt->dst.dev); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); if (!peer) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, + rt_nexthop(rt, ip_hdr(skb)->daddr)); return; } @@ -827,7 +828,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) time_after(jiffies, (peer->rate_last + (ip_rt_redirect_load << peer->rate_tokens)))) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); + + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE @@ -835,7 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), - &ip_hdr(skb)->daddr, &rt->rt_gateway); + &ip_hdr(skb)->daddr, &gw); #endif } out_put_peer: @@ -904,22 +907,32 @@ out: kfree_skb(skb); return 0; } -static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) +static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { + struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst->dev->mtu < mtu) + return; + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (!rt->rt_pmtu) { + dst->obsolete = DST_OBSOLETE_KILL; + } else { + rt->rt_pmtu = mtu; + dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); + } + rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { + if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); - return mtu; } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -929,14 +942,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - mtu = __ip_rt_update_pmtu(rt, &fl4, mtu); - - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); - } + __ip_rt_update_pmtu(rt, &fl4, mtu); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -1120,7 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - if (rt->rt_gateway && mtu > 576) + if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1171,7 +1177,9 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, if (fnhe->fnhe_gw) { rt->rt_flags |= RTCF_REDIRECTED; rt->rt_gateway = fnhe->fnhe_gw; - } + rt->rt_uses_gateway = 1; + } else if (!rt->rt_gateway) + rt->rt_gateway = daddr; orig = rcu_dereference(fnhe->fnhe_rth); rcu_assign_pointer(fnhe->fnhe_rth, rt); @@ -1180,13 +1188,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, fnhe->fnhe_stamp = jiffies; ret = true; - } else { - /* Routes we intend to cache in nexthop exception have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ - rt->dst.flags |= DST_NOCACHE; } spin_unlock_bh(&fnhe_lock); @@ -1201,8 +1202,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; } else { - if (!nh->nh_pcpu_rth_output) - goto nocache; p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); } orig = *p; @@ -1211,16 +1210,8 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (prev == orig) { if (orig) rt_free(orig); - } else { - /* Routes we intend to cache in the FIB nexthop have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ -nocache: - rt->dst.flags |= DST_NOCACHE; + } else ret = false; - } return ret; } @@ -1281,8 +1272,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { rt->rt_gateway = nh->nh_gw; + rt->rt_uses_gateway = 1; + } dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; @@ -1291,8 +1284,18 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) cached = rt_cache_route(nh, rt); - } - if (unlikely(!cached)) + if (unlikely(!cached)) { + /* Routes we intend to cache in nexthop exception or + * FIB nexthop have the DST_NOCACHE bit clear. + * However, if we are unsuccessful at storing this + * route into the cache we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; + if (!rt->rt_gateway) + rt->rt_gateway = daddr; + rt_add_uncached_list(rt); + } + } else rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1360,6 +1363,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; @@ -1429,7 +1433,6 @@ static int __mkroute_input(struct sk_buff *skb, return -EINVAL; } - err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { @@ -1439,10 +1442,13 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - if (out_dev == in_dev && err && + do_cache = res->fi && !itag; + if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) + inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { flags |= RTCF_DOREDIRECT; + do_cache = false; + } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -1459,15 +1465,11 @@ static int __mkroute_input(struct sk_buff *skb, } } - do_cache = false; - if (res->fi) { - if (!itag) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); - if (rt_cache_valid(rth)) { - skb_dst_set_noref(skb, &rth->dst); - goto out; - } - do_cache = true; + if (do_cache) { + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { + skb_dst_set_noref(skb, &rth->dst); + goto out; } } @@ -1486,6 +1488,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; @@ -1656,6 +1659,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1758,6 +1762,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct in_device *in_dev; u16 type = res->type; struct rtable *rth; + bool do_cache; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) @@ -1794,24 +1799,36 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } fnhe = NULL; + do_cache = fi != NULL; if (fi) { struct rtable __rcu **prth; + struct fib_nh *nh = &FIB_RES_NH(*res); - fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + fnhe = find_exception(nh, fl4->daddr); if (fnhe) prth = &fnhe->fnhe_rth; - else - prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + else { + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = __this_cpu_ptr(nh->nh_pcpu_rth_output); + } rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; } } + +add: rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi); + do_cache); if (!rth) return ERR_PTR(-ENOBUFS); @@ -1824,6 +1841,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -2102,6 +2120,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + rt->rt_uses_gateway = ort->rt_uses_gateway; INIT_LIST_HEAD(&rt->rt_uncached); @@ -2180,12 +2199,22 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_gateway && + if (rt->rt_uses_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; + expires = rt->dst.expires; + if (expires) { + unsigned long now = jiffies; + + if (time_before(now, expires)) + expires -= now; + else + expires = 0; + } + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt_pmtu) + if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; @@ -2195,13 +2224,6 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, goto nla_put_failure; error = rt->dst.error; - expires = rt->dst.expires; - if (expires) { - if (time_before(jiffies, expires)) - expires -= jiffies; - else - expires = 0; - } if (rt_is_input_route(rt)) { if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 681ea2f..05c5ab8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,6 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; + xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e22e6d8..a974247 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -822,13 +822,6 @@ out: return segs; } -struct ipv6_gro_cb { - struct napi_gro_cb napi; - int proto; -}; - -#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb) - static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -874,28 +867,31 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, iph = ipv6_hdr(skb); } - IPV6_GRO_CB(skb)->proto = proto; + NAPI_GRO_CB(skb)->proto = proto; flush--; nlen = skb_network_header_len(skb); for (p = *head; p; p = p->next) { - struct ipv6hdr *iph2; + const struct ipv6hdr *iph2; + __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */ if (!NAPI_GRO_CB(p)->same_flow) continue; iph2 = ipv6_hdr(p); + first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - /* All fields must match except length. */ + /* All fields must match except length and Traffic Class. */ if (nlen != skb_network_header_len(p) || - memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) || + (first_word & htonl(0xF00FFFFF)) || memcmp(&iph->nexthdr, &iph2->nexthdr, nlen - offsetof(struct ipv6hdr, nexthdr))) { NAPI_GRO_CB(p)->same_flow = 0; continue; } - + /* flush if Traffic Class fields are different */ + NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; } @@ -927,7 +923,7 @@ static int ipv6_gro_complete(struct sk_buff *skb) sizeof(*iph)); rcu_read_lock(); - ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]); + ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]); if (WARN_ON(!ops || !ops->gro_complete)) goto out_unlock; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 56f6d5d..cc4c809 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -50,6 +50,7 @@ enum { * local */ IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ + IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ }; /* @@ -113,6 +114,8 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, fl4.daddr = daddr; fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_tos = rtos; + fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? + FLOWI_FLAG_KNOWN_NH : 0; retry: rt = ip_route_output_key(net, &fl4); @@ -1061,7 +1064,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL, NULL))) + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_KNOWN_NH, NULL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0f2e3ad..01e944a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -169,6 +169,8 @@ static void netlink_sock_destruct(struct sock *sk) if (nlk->cb) { if (nlk->cb->done) nlk->cb->done(nlk->cb); + + module_put(nlk->cb->module); netlink_destroy_callback(nlk->cb); } @@ -1758,6 +1760,7 @@ static int netlink_dump(struct sock *sk) nlk->cb = NULL; mutex_unlock(nlk->cb_mutex); + module_put(cb->module); netlink_consume_callback(cb); return 0; @@ -1767,9 +1770,9 @@ errout_skb: return err; } -int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - struct netlink_dump_control *control) +int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *control) { struct netlink_callback *cb; struct sock *sk; @@ -1784,6 +1787,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->done = control->done; cb->nlh = nlh; cb->data = control->data; + cb->module = control->module; cb->min_dump_alloc = control->min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; @@ -1794,19 +1798,28 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, return -ECONNREFUSED; } nlk = nlk_sk(sk); - /* A dump is in progress... */ + mutex_lock(nlk->cb_mutex); + /* A dump is in progress... */ if (nlk->cb) { mutex_unlock(nlk->cb_mutex); netlink_destroy_callback(cb); - sock_put(sk); - return -EBUSY; + ret = -EBUSY; + goto out; } + /* add reference of module which cb->dump belongs to */ + if (!try_module_get(cb->module)) { + mutex_unlock(nlk->cb_mutex); + netlink_destroy_callback(cb); + ret = -EPROTONOSUPPORT; + goto out; + } + nlk->cb = cb; mutex_unlock(nlk->cb_mutex); ret = netlink_dump(sk); - +out: sock_put(sk); if (ret) @@ -1817,7 +1830,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, */ return -EINTR; } -EXPORT_SYMBOL(netlink_dump_start); +EXPORT_SYMBOL(__netlink_dump_start); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) { |