diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2014-03-14 20:51:52 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-17 15:48:12 -0400 |
commit | 9c62a68d13119a1ca9718381d97b0cb415ff4e9d (patch) | |
tree | aae7c0ffaa451dc44afbbba9c37780e9c12bd1fc /net/core | |
parent | e1bd4d3d7dd2a4a0e731ffe07c439927c23f16ea (diff) | |
download | op-kernel-dev-9c62a68d13119a1ca9718381d97b0cb415ff4e9d.zip op-kernel-dev-9c62a68d13119a1ca9718381d97b0cb415ff4e9d.tar.gz |
netpoll: Remove dead packet receive code (CONFIG_NETPOLL_TRAP)
The netpoll packet receive code only becomes active if the netpoll
rx_skb_hook is implemented, and there is not a single implementation
of the netpoll rx_skb_hook in the kernel.
All of the out of tree implementations I have found all call
netpoll_poll which was removed from the kernel in 2011, so this
change should not add any additional breakage.
There are problems with the netpoll packet receive code. __netpoll_rx
does not call dev_kfree_skb_irq or dev_kfree_skb_any in hard irq
context. netpoll_neigh_reply leaks every skb it receives. Reception
of packets does not work successfully on stacked devices (aka bonding,
team, bridge, and vlans).
Given that the netpoll packet receive code is buggy, there are no
out of tree users that will be merged soon, and the code has
not been used for in tree for a decade let's just remove it.
Reverting this commit can server as a starting point for anyone
who wants to resurrect netpoll packet reception support.
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 11 | ||||
-rw-r--r-- | net/core/netpoll.c | 520 |
2 files changed, 2 insertions, 529 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 587f9fb..55f8e64 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3231,10 +3231,6 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - /* if netpoll wants it, pretend we never saw it */ - if (netpoll_rx(skb)) - return NET_RX_DROP; - net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); @@ -3520,10 +3516,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) trace_netif_receive_skb(skb); - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) - goto out; - orig_dev = skb->dev; skb_reset_network_header(skb); @@ -3650,7 +3642,6 @@ drop: unlock: rcu_read_unlock(); -out: return ret; } @@ -3875,7 +3866,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff int same_flow; enum gro_result ret; - if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) + if (!(skb->dev->features & NETIF_F_GRO)) goto normal; if (skb_is_gso(skb) || skb_has_frag_list(skb)) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index eed8b1d..7291dde 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -46,11 +46,6 @@ static struct sk_buff_head skb_pool; -#ifdef CONFIG_NETPOLL_TRAP -static atomic_t trapped; -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); -#endif - DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 @@ -109,27 +104,6 @@ static void queue_process(struct work_struct *work) } } -#ifdef CONFIG_NETPOLL_TRAP -static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, __be32 saddr, __be32 daddr) -{ - __wsum psum; - - if (uh->check == 0 || skb_csum_unnecessary(skb)) - return 0; - - psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - - skb->csum = psum; - - return __skb_checksum_complete(skb); -} -#endif /* CONFIG_NETPOLL_TRAP */ - /* * Check whether delayed processing was scheduled for our NIC. If so, * we attempt to grab the poll lock and use ->poll() to pump the card. @@ -140,11 +114,6 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, * trylock here and interrupts are already disabled in the softirq * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. - * - * In cases where there is bi-directional communications, reading only - * one message at a time can lead to packets being dropped by the - * network adapter, forcing superfluous retries and possibly timeouts. - * Thus, we set our budget to greater than 1. */ static int poll_one_napi(struct napi_struct *napi, int budget) { @@ -181,38 +150,11 @@ static void poll_napi(struct net_device *dev, int budget) } } -#ifdef CONFIG_NETPOLL_TRAP -static void service_neigh_queue(struct net_device *dev, - struct netpoll_info *npi) -{ - struct sk_buff *skb; - if (dev->flags & IFF_SLAVE) { - struct net_device *bond_dev; - struct netpoll_info *bond_ni; - - bond_dev = netdev_master_upper_dev_get_rcu(dev); - bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&npi->neigh_tx))) { - skb->dev = bond_dev; - skb_queue_tail(&bond_ni->neigh_tx, skb); - } - } - while ((skb = skb_dequeue(&npi->neigh_tx))) - netpoll_neigh_reply(skb, npi); -} -#else /* !CONFIG_NETPOLL_TRAP */ -static inline void service_neigh_queue(struct net_device *dev, - struct netpoll_info *npi) -{ -} -#endif /* CONFIG_NETPOLL_TRAP */ - static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); - bool rx_processing = netpoll_rx_processing(ni); - int budget = rx_processing? 16 : 0; + int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -226,9 +168,6 @@ static void netpoll_poll_dev(struct net_device *dev) return; } - if (rx_processing) - netpoll_set_trap(1); - ops = dev->netdev_ops; if (!ops->ndo_poll_controller) { up(&ni->dev_lock); @@ -240,13 +179,8 @@ static void netpoll_poll_dev(struct net_device *dev) poll_napi(dev, budget); - if (rx_processing) - netpoll_set_trap(0); - up(&ni->dev_lock); - service_neigh_queue(dev, ni); - zap_completion_queue(); } @@ -531,434 +465,6 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -#ifdef CONFIG_NETPOLL_TRAP -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int size, type = ARPOP_REPLY; - __be32 sip, tip; - unsigned char *sha; - struct sk_buff *send_skb; - struct netpoll *np, *tmp; - unsigned long flags; - int hlen, tlen; - int hits = 0, proto; - - if (!netpoll_rx_processing(npinfo)) - return; - - /* Before checking the packet, we do some early - inspection whether this is interesting at all */ - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->dev == skb->dev) - hits++; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - - /* No netpoll struct is using this dev */ - if (!hits) - return; - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_ARP) { - struct arphdr *arp; - unsigned char *arp_ptr; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); - - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; - - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; - - size = arp_hdr_len(skb->dev); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip.ip) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); - - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ - - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); - - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } else if( proto == ETH_P_IPV6) { -#if IS_ENABLED(CONFIG_IPV6) - struct nd_msg *msg; - u8 *lladdr = NULL; - struct ipv6hdr *hdr; - struct icmp6hdr *icmp6h; - const struct in6_addr *saddr; - const struct in6_addr *daddr; - struct inet6_dev *in6_dev = NULL; - struct in6_addr *target; - - in6_dev = in6_dev_get(skb->dev); - if (!in6_dev || !in6_dev->cnf.accept_ra) - return; - - if (!pskb_may_pull(skb, skb->len)) - return; - - msg = (struct nd_msg *)skb_transport_header(skb); - - __skb_push(skb, skb->data - skb_transport_header(skb)); - - if (ipv6_hdr(skb)->hop_limit != 255) - return; - if (msg->icmph.icmp6_code != 0) - return; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return; - - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - - size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - send_skb->protocol = htons(ETH_P_IPV6); - send_skb->dev = skb->dev; - - skb_reset_network_header(send_skb); - hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); - *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); - hdr->nexthdr = IPPROTO_ICMPV6; - hdr->hop_limit = 255; - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); - icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; - icmp6h->icmp6_router = 0; - icmp6h->icmp6_solicited = 1; - - target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); - *target = msg->target; - icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, - IPPROTO_ICMPV6, - csum_partial(icmp6h, - size, 0)); - - if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, - lladdr, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address, we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); -#endif - } -} - -static bool pkt_is_ns(struct sk_buff *skb) -{ - struct nd_msg *msg; - struct ipv6hdr *hdr; - - if (skb->protocol != htons(ETH_P_ARP)) - return false; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) - return false; - - msg = (struct nd_msg *)skb_transport_header(skb); - __skb_push(skb, skb->data - skb_transport_header(skb)); - hdr = ipv6_hdr(skb); - - if (hdr->nexthdr != IPPROTO_ICMPV6) - return false; - if (hdr->hop_limit != 255) - return false; - if (msg->icmph.icmp6_code != 0) - return false; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return false; - - return true; -} - -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int proto, len, ulen, data_len; - int hits = 0, offset; - const struct iphdr *iph; - struct udphdr *uh; - struct netpoll *np, *tmp; - uint16_t source; - - if (!netpoll_rx_processing(npinfo)) - goto out; - - if (skb->dev->type != ARPHRD_ETHER) - goto out; - - /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && netpoll_trap()) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } else if (pkt_is_ns(skb) && netpoll_trap()) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } - - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - skb = vlan_untag(skb); - if (unlikely(!skb)) - goto out; - } - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP && proto != ETH_P_IPV6) - goto out; - if (skb->pkt_type == PACKET_OTHERHOST) - goto out; - if (skb_shared(skb)) - goto out; - - if (proto == ETH_P_IP) { - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; - - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; - - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; - - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip.ip && np->local_ip.ip != iph->daddr) - continue; - if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } - } else { -#if IS_ENABLED(CONFIG_IPV6) - const struct ipv6hdr *ip6h; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - ip6h = (struct ipv6hdr *)skb->data; - if (ip6h->version != 6) - goto out; - len = ntohs(ip6h->payload_len); - if (!len) - goto out; - if (len + sizeof(struct ipv6hdr) > skb->len) - goto out; - if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) - goto out; - ip6h = ipv6_hdr(skb); - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - uh = udp_hdr(skb); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - if (ulen != skb->len) - goto out; - if (udp6_csum_init(skb, uh, IPPROTO_UDP)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) - continue; - if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } -#endif - } - - if (!hits) - goto out; - - kfree_skb(skb); - return 1; - -out: - if (netpoll_trap()) { - kfree_skb(skb); - return 1; - } - - return 0; -} - -static void netpoll_trap_setup_info(struct netpoll_info *npinfo) -{ - INIT_LIST_HEAD(&npinfo->rx_np); - spin_lock_init(&npinfo->rx_lock); - skb_queue_head_init(&npinfo->neigh_tx); -} - -static void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) -{ - skb_queue_purge(&npinfo->neigh_tx); -} - -static void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) -{ - unsigned long flags; - if (np->rx_skb_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } -} - -static void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) -{ - unsigned long flags; - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } -} - -#else /* !CONFIG_NETPOLL_TRAP */ -static inline void netpoll_trap_setup_info(struct netpoll_info *npinfo) -{ -} -static inline void netpoll_trap_cleanup_info(struct netpoll_info *npinfo) -{ -} -static inline -void netpoll_trap_setup(struct netpoll *np, struct netpoll_info *npinfo) -{ -} -static inline -void netpoll_trap_cleanup(struct netpoll *np, struct netpoll_info *npinfo) -{ -} -#endif /* CONFIG_NETPOLL_TRAP */ - void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); @@ -1103,8 +609,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) goto out; } - netpoll_trap_setup_info(npinfo); - sema_init(&npinfo->dev_lock, 1); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1124,8 +628,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - netpoll_trap_setup(np, npinfo); - /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -1274,7 +776,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - netpoll_trap_cleanup_info(npinfo); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -1299,8 +800,6 @@ void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; - netpoll_trap_cleanup(np, npinfo); - synchronize_srcu(&netpoll_srcu); if (atomic_dec_and_test(&npinfo->refcnt)) { @@ -1344,20 +843,3 @@ out: rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); - -#ifdef CONFIG_NETPOLL_TRAP -int netpoll_trap(void) -{ - return atomic_read(&trapped); -} -EXPORT_SYMBOL(netpoll_trap); - -void netpoll_set_trap(int trap) -{ - if (trap) - atomic_inc(&trapped); - else - atomic_dec(&trapped); -} -EXPORT_SYMBOL(netpoll_set_trap); -#endif /* CONFIG_NETPOLL_TRAP */ |