diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-20 17:43:29 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-20 17:43:29 -0700 |
commit | db6d8c7a4027b48d797b369a53f8470aaeed7063 (patch) | |
tree | e140c104a89abc2154e1f41a7db8ebecbb6fa0b4 /net/core | |
parent | 3a533374283aea50eab3976d8a6d30532175f009 (diff) | |
parent | fb65a7c091529bfffb1262515252c0d0f6241c5c (diff) | |
download | op-kernel-dev-db6d8c7a4027b48d797b369a53f8470aaeed7063.zip op-kernel-dev-db6d8c7a4027b48d797b369a53f8470aaeed7063.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (1232 commits)
iucv: Fix bad merging.
net_sched: Add size table for qdiscs
net_sched: Add accessor function for packet length for qdiscs
net_sched: Add qdisc_enqueue wrapper
highmem: Export totalhigh_pages.
ipv6 mcast: Omit redundant address family checks in ip6_mc_source().
net: Use standard structures for generic socket address structures.
ipv6 netns: Make several "global" sysctl variables namespace aware.
netns: Use net_eq() to compare net-namespaces for optimization.
ipv6: remove unused macros from net/ipv6.h
ipv6: remove unused parameter from ip6_ra_control
tcp: fix kernel panic with listening_get_next
tcp: Remove redundant checks when setting eff_sacks
tcp: options clean up
tcp: Fix MD5 signatures for non-linear skbs
sctp: Update sctp global memory limit allocations.
sctp: remove unnecessary byteshifting, calculate directly in big-endian
sctp: Allow only 1 listening socket with SO_REUSEADDR
sctp: Do not leak memory on multiple listen() calls
sctp: Support ipv6only AF_INET6 sockets.
...
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 380 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 24 | ||||
-rw-r--r-- | net/core/ethtool.c | 37 | ||||
-rw-r--r-- | net/core/fib_rules.c | 2 | ||||
-rw-r--r-- | net/core/iovec.c | 2 | ||||
-rw-r--r-- | net/core/link_watch.c | 11 | ||||
-rw-r--r-- | net/core/neighbour.c | 8 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 26 | ||||
-rw-r--r-- | net/core/netpoll.c | 24 | ||||
-rw-r--r-- | net/core/pktgen.c | 71 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 26 | ||||
-rw-r--r-- | net/core/skbuff.c | 166 | ||||
-rw-r--r-- | net/core/sock.c | 8 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 39 |
14 files changed, 529 insertions, 295 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 821cb16..2eed17b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -90,6 +90,7 @@ #include <linux/if_ether.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/notifier.h> #include <linux/skbuff.h> #include <net/net_namespace.h> @@ -120,6 +121,9 @@ #include <linux/ctype.h> #include <linux/if_arp.h> #include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> #include "net-sysfs.h" @@ -257,7 +261,7 @@ DEFINE_PER_CPU(struct softnet_data, softnet_data); #ifdef CONFIG_DEBUG_LOCK_ALLOC /* - * register_netdevice() inits dev->_xmit_lock and sets lockdep class + * register_netdevice() inits txq->_xmit_lock and sets lockdep class * according to dev->type */ static const unsigned short netdev_lock_type[] = @@ -961,6 +965,12 @@ void netdev_state_change(struct net_device *dev) } } +void netdev_bonding_change(struct net_device *dev) +{ + call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev); +} +EXPORT_SYMBOL(netdev_bonding_change); + /** * dev_load - load a network module * @net: the applicable net namespace @@ -1117,6 +1127,29 @@ int dev_close(struct net_device *dev) } +/** + * dev_disable_lro - disable Large Receive Offload on a device + * @dev: device + * + * Disable Large Receive Offload (LRO) on a net device. Must be + * called under RTNL. This is needed if received packets may be + * forwarded to another interface. + */ +void dev_disable_lro(struct net_device *dev) +{ + if (dev->ethtool_ops && dev->ethtool_ops->get_flags && + dev->ethtool_ops->set_flags) { + u32 flags = dev->ethtool_ops->get_flags(dev); + if (flags & ETH_FLAG_LRO) { + flags &= ~ETH_FLAG_LRO; + dev->ethtool_ops->set_flags(dev, flags); + } + } + WARN_ON(dev->features & NETIF_F_LRO); +} +EXPORT_SYMBOL(dev_disable_lro); + + static int dev_boot_phase = 1; /* @@ -1290,16 +1323,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -void __netif_schedule(struct net_device *dev) +void __netif_schedule(struct Qdisc *q) { - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; + BUG_ON(q == &noop_qdisc); + + if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { struct softnet_data *sd; + unsigned long flags; local_irq_save(flags); sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; + q->next_sched = sd->output_queue; + sd->output_queue = q; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } @@ -1566,7 +1601,8 @@ static int dev_gso_segment(struct sk_buff *skb) return 0; } -int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) { if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1595,9 +1631,7 @@ gso: skb->next = nskb; return rc; } - if (unlikely((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) && - skb->next)) + if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); @@ -1634,9 +1668,71 @@ out_kfree_skb: * --BLG */ +static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +{ + u32 *addr, *ports, hash, ihl; + u8 ip_proto; + int alen; + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + ip_proto = ip_hdr(skb)->protocol; + addr = &ip_hdr(skb)->saddr; + ihl = ip_hdr(skb)->ihl; + alen = 2; + break; + case __constant_htons(ETH_P_IPV6): + ip_proto = ipv6_hdr(skb)->nexthdr; + addr = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + ihl = (40 >> 2); + alen = 8; + break; + default: + return 0; + } + + ports = (u32 *) (skb_network_header(skb) + (ihl * 4)); + + hash = 0; + while (alen--) + hash ^= *addr++; + + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + hash ^= *ports; + break; + + default: + break; + } + + return hash % dev->real_num_tx_queues; +} + +static struct netdev_queue *dev_pick_tx(struct net_device *dev, + struct sk_buff *skb) +{ + u16 queue_index = 0; + + if (dev->select_queue) + queue_index = dev->select_queue(dev, skb); + else if (dev->real_num_tx_queues > 1) + queue_index = simple_tx_hash(dev, skb); + + skb_set_queue_mapping(skb, queue_index); + return netdev_get_tx_queue(dev, queue_index); +} + int dev_queue_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; struct Qdisc *q; int rc = -ENOMEM; @@ -1669,44 +1765,29 @@ int dev_queue_xmit(struct sk_buff *skb) } gso: - spin_lock_prefetch(&dev->queue_lock); - /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ rcu_read_lock_bh(); - /* Updates of qdisc are serialized by queue_lock. - * The struct Qdisc which is pointed to by qdisc is now a - * rcu structure - it may be accessed without acquiring - * a lock (but the structure may be stale.) The freeing of the - * qdisc will be deferred until it's known that there are no - * more references to it. - * - * If the qdisc has an enqueue function, we still need to - * hold the queue_lock before calling it, since queue_lock - * also serializes access to the device queue. - */ + txq = dev_pick_tx(dev, skb); + q = rcu_dereference(txq->qdisc); - q = rcu_dereference(dev->qdisc); #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); #endif if (q->enqueue) { - /* Grab device queue */ - spin_lock(&dev->queue_lock); - q = dev->qdisc; - if (q->enqueue) { - /* reset queue_mapping to zero */ - skb_set_queue_mapping(skb, 0); - rc = q->enqueue(skb, q); - qdisc_run(dev); - spin_unlock(&dev->queue_lock); - - rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; - goto out; - } - spin_unlock(&dev->queue_lock); + spinlock_t *root_lock = qdisc_root_lock(q); + + spin_lock(root_lock); + + rc = qdisc_enqueue_root(skb, q); + qdisc_run(q); + + spin_unlock(root_lock); + + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; + goto out; } /* The device has no queue. Common case for software devices: @@ -1724,19 +1805,18 @@ gso: if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (dev->xmit_lock_owner != cpu) { + if (txq->xmit_lock_owner != cpu) { - HARD_TX_LOCK(dev, cpu); + HARD_TX_LOCK(dev, txq, cpu); - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) { + if (!netif_tx_queue_stopped(txq)) { rc = 0; - if (!dev_hard_start_xmit(skb, dev)) { - HARD_TX_UNLOCK(dev); + if (!dev_hard_start_xmit(skb, dev, txq)) { + HARD_TX_UNLOCK(dev, txq); goto out; } } - HARD_TX_UNLOCK(dev); + HARD_TX_UNLOCK(dev, txq); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); @@ -1880,7 +1960,7 @@ static void net_tx_action(struct softirq_action *h) } if (sd->output_queue) { - struct net_device *head; + struct Qdisc *head; local_irq_disable(); head = sd->output_queue; @@ -1888,17 +1968,20 @@ static void net_tx_action(struct softirq_action *h) local_irq_enable(); while (head) { - struct net_device *dev = head; + struct Qdisc *q = head; + spinlock_t *root_lock; + head = head->next_sched; smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_SCHED, &dev->state); + clear_bit(__QDISC_STATE_SCHED, &q->state); - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); + root_lock = qdisc_root_lock(q); + if (spin_trylock(root_lock)) { + qdisc_run(q); + spin_unlock(root_lock); } else { - netif_schedule(dev); + __netif_schedule(q); } } } @@ -1979,10 +2062,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, */ static int ing_filter(struct sk_buff *skb) { - struct Qdisc *q; struct net_device *dev = skb->dev; - int result = TC_ACT_OK; u32 ttl = G_TC_RTTL(skb->tc_verd); + struct netdev_queue *rxq; + int result = TC_ACT_OK; + struct Qdisc *q; if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING @@ -1994,10 +2078,14 @@ static int ing_filter(struct sk_buff *skb) skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + rxq = &dev->rx_queue; + + q = rxq->qdisc; + if (q) { + spin_lock(qdisc_lock(q)); + result = qdisc_enqueue_root(skb, q); + spin_unlock(qdisc_lock(q)); + } return result; } @@ -2006,7 +2094,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { - if (!skb->dev->qdisc_ingress) + if (!skb->dev->rx_queue.qdisc) goto out; if (*pt_prev) { @@ -2030,6 +2118,33 @@ out: } #endif +/* + * netif_nit_deliver - deliver received packets to network taps + * @skb: buffer + * + * This function is used to deliver incoming packets to network + * taps. It should be used when the normal netif_receive_skb path + * is bypassed, for example because of VLAN acceleration. + */ +void netif_nit_deliver(struct sk_buff *skb) +{ + struct packet_type *ptype; + + if (list_empty(&ptype_all)) + return; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->mac_len = skb->network_header - skb->mac_header; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_all, list) { + if (!ptype->dev || ptype->dev == skb->dev) + deliver_skb(skb, ptype, skb->dev); + } + rcu_read_unlock(); +} + /** * netif_receive_skb - process receive buffer from network * @skb: buffer to process @@ -2769,16 +2884,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } -static void __dev_set_promiscuity(struct net_device *dev, int inc) +static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); - if ((dev->promiscuity += inc) == 0) - dev->flags &= ~IFF_PROMISC; - else - dev->flags |= IFF_PROMISC; + dev->flags |= IFF_PROMISC; + dev->promiscuity += inc; + if (dev->promiscuity == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch promisc and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_PROMISC; + else { + dev->promiscuity -= inc; + printk(KERN_WARNING "%s: promiscuity touches roof, " + "set promiscuity failed, promiscuity feature " + "of device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags != old_flags) { printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : @@ -2796,6 +2924,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_PROMISC); } + return 0; } /** @@ -2807,14 +2936,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) * remains above zero the interface remains promiscuous. Once it hits zero * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_promiscuity(struct net_device *dev, int inc) +int dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + int err; - __dev_set_promiscuity(dev, inc); + err = __dev_set_promiscuity(dev, inc); + if (err < 0) + return err; if (dev->flags != old_flags) dev_set_rx_mode(dev); + return err; } /** @@ -2827,22 +2961,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc) * to all interfaces. Once it hits zero the device reverts back to normal * filtering operation. A negative @inc value is used to drop the counter * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. */ -void dev_set_allmulti(struct net_device *dev, int inc) +int dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; ASSERT_RTNL(); dev->flags |= IFF_ALLMULTI; - if ((dev->allmulti += inc) == 0) - dev->flags &= ~IFF_ALLMULTI; + dev->allmulti += inc; + if (dev->allmulti == 0) { + /* + * Avoid overflow. + * If inc causes overflow, untouch allmulti and return error. + */ + if (inc < 0) + dev->flags &= ~IFF_ALLMULTI; + else { + dev->allmulti -= inc; + printk(KERN_WARNING "%s: allmulti touches roof, " + "set allmulti failed, allmulti feature of " + "device might be broken.\n", dev->name); + return -EOVERFLOW; + } + } if (dev->flags ^ old_flags) { if (dev->change_rx_flags) dev->change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } + return 0; } /* @@ -2881,9 +3031,9 @@ void __dev_set_rx_mode(struct net_device *dev) void dev_set_rx_mode(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } int __dev_addr_delete(struct dev_addr_list **list, int *count, @@ -2961,11 +3111,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_delete); @@ -2987,11 +3137,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) ASSERT_RTNL(); - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(dev_unicast_add); @@ -3058,12 +3208,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } EXPORT_SYMBOL(dev_unicast_sync); @@ -3079,15 +3229,15 @@ EXPORT_SYMBOL(dev_unicast_sync); */ void dev_unicast_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->uc_list, &to->uc_count, &from->uc_list, &from->uc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -3107,7 +3257,7 @@ static void __dev_addr_discard(struct dev_addr_list **list) static void dev_addr_discard(struct net_device *dev) { - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); __dev_addr_discard(&dev->uc_list); dev->uc_count = 0; @@ -3115,7 +3265,7 @@ static void dev_addr_discard(struct net_device *dev) __dev_addr_discard(&dev->mc_list); dev->mc_count = 0; - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); } unsigned dev_get_flags(const struct net_device *dev) @@ -3688,6 +3838,21 @@ static void rollback_registered(struct net_device *dev) dev_put(dev); } +static void __netdev_init_queue_locks_one(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + spin_lock_init(&dev_queue->_xmit_lock); + netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type); + dev_queue->xmit_lock_owner = -1; +} + +static void netdev_init_queue_locks(struct net_device *dev) +{ + netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL); + __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); +} + /** * register_netdevice - register a network device * @dev: device to register @@ -3722,11 +3887,8 @@ int register_netdevice(struct net_device *dev) BUG_ON(!dev_net(dev)); net = dev_net(dev); - spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->_xmit_lock); - netdev_set_lockdep_class(&dev->_xmit_lock, dev->type); - dev->xmit_lock_owner = -1; - spin_lock_init(&dev->ingress_lock); + spin_lock_init(&dev->addr_list_lock); + netdev_init_queue_locks(dev); dev->iflink = -1; @@ -4007,6 +4169,19 @@ static struct net_device_stats *internal_stats(struct net_device *dev) return &dev->stats; } +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, + void *_unused) +{ + queue->dev = dev; +} + +static void netdev_init_queues(struct net_device *dev) +{ + netdev_init_one_queue(dev, &dev->rx_queue, NULL); + netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); +} + /** * alloc_netdev_mq - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -4021,14 +4196,14 @@ static struct net_device_stats *internal_stats(struct net_device *dev) struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, void (*setup)(struct net_device *), unsigned int queue_count) { - void *p; + struct netdev_queue *tx; struct net_device *dev; int alloc_size; + void *p; BUG_ON(strlen(name) >= sizeof(dev->name)); - alloc_size = sizeof(struct net_device) + - sizeof(struct net_device_subqueue) * (queue_count - 1); + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; @@ -4043,22 +4218,33 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, return NULL; } + tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL); + if (!tx) { + printk(KERN_ERR "alloc_netdev: Unable to allocate " + "tx qdiscs.\n"); + kfree(p); + return NULL; + } + dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; dev_net_set(dev, &init_net); + dev->_tx = tx; + dev->num_tx_queues = queue_count; + dev->real_num_tx_queues = queue_count; + if (sizeof_priv) { dev->priv = ((char *)dev + - ((sizeof(struct net_device) + - (sizeof(struct net_device_subqueue) * - (queue_count - 1)) + NETDEV_ALIGN_CONST) + ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST)); } - dev->egress_subqueue_count = queue_count; dev->gso_max_size = GSO_MAX_SIZE; + netdev_init_queues(dev); + dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); @@ -4079,6 +4265,8 @@ void free_netdev(struct net_device *dev) { release_net(dev_net(dev)); + kfree(dev->_tx); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4260,7 +4448,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct net_device **list_net; + struct Qdisc **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index f8a3455..5402b3b 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -72,7 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) { @@ -83,7 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) __dev_set_rx_mode(dev); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -95,11 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) { int err; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); if (!err) __dev_set_rx_mode(dev); - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return err; } @@ -119,12 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) { int err = 0; - netif_tx_lock_bh(to); + netif_addr_lock_bh(to); err = __dev_addr_sync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); if (!err) __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); + netif_addr_unlock_bh(to); return err; } @@ -143,15 +143,15 @@ EXPORT_SYMBOL(dev_mc_sync); */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { - netif_tx_lock_bh(from); - netif_tx_lock_bh(to); + netif_addr_lock_bh(from); + netif_addr_lock(to); __dev_addr_unsync(&to->mc_list, &to->mc_count, &from->mc_list, &from->mc_count); __dev_set_rx_mode(to); - netif_tx_unlock_bh(to); - netif_tx_unlock_bh(from); + netif_addr_unlock(to); + netif_addr_unlock_bh(from); } EXPORT_SYMBOL(dev_mc_unsync); @@ -164,7 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) return 0; - netif_tx_lock_bh(dev); + netif_addr_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -176,7 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } - netif_tx_unlock_bh(dev); + netif_addr_unlock_bh(dev); return 0; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0133b5e..14ada53 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } +static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc cmd; + + if (!dev->ethtool_ops->set_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + return dev->ethtool_ops->set_rxhash(dev, &cmd); +} + +static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc info; + + if (!dev->ethtool_ops->get_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + dev->ethtool_ops->get_rxhash(dev, &info); + + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; +} + static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; @@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GGSO: case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: + case ETHTOOL_GRXFH: break; default: if (!capable(CAP_NET_ADMIN)) @@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_set_value(dev, useraddr, dev->ethtool_ops->set_priv_flags); break; + case ETHTOOL_GRXFH: + rc = ethtool_get_rxhash(dev, useraddr); + break; + case ETHTOOL_SRXFH: + rc = ethtool_set_rxhash(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 277a230..79de3b1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops) static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) - ops->flush_cache(); + ops->flush_cache(ops); } int fib_rules_register(struct fib_rules_ops *ops) diff --git a/net/core/iovec.c b/net/core/iovec.c index 755c37f..4c9c012 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -36,7 +36,7 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode) +int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { int size, err, ct; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a5e372b..bf8f7af 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -77,10 +77,10 @@ static void rfc2863_policy(struct net_device *dev) } -static int linkwatch_urgent_event(struct net_device *dev) +static bool linkwatch_urgent_event(struct net_device *dev) { return netif_running(dev) && netif_carrier_ok(dev) && - dev->qdisc != dev->qdisc_sleeping; + qdisc_tx_changing(dev); } @@ -180,10 +180,9 @@ static void __linkwatch_run_queue(int urgent_only) rfc2863_policy(dev); if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) { - WARN_ON(dev->qdisc_sleeping == &noop_qdisc); + if (netif_carrier_ok(dev)) dev_activate(dev); - } else + else dev_deactivate(dev); netdev_state_change(dev); @@ -214,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy) void linkwatch_fire_event(struct net_device *dev) { - int urgent = linkwatch_urgent_event(dev); + bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 65f01f7..f62c8af 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -930,6 +930,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) buff = neigh->arp_queue.next; __skb_unlink(buff, &neigh->arp_queue); kfree_skb(buff); + NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } __skb_queue_tail(&neigh->arp_queue, skb); } @@ -2462,12 +2463,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n"); + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n"); return 0; } seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx\n", + "%08lx %08lx %08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, @@ -2483,7 +2484,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) st->rcv_probes_ucast, st->periodic_gc_runs, - st->forced_gc_runs + st->forced_gc_runs, + st->unres_discards ); return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 90e2177..c1f4e0d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d, offset % sizeof(unsigned long) != 0); read_lock(&dev_base_lock); - if (dev_isalive(dev) && dev->get_stats && - (stats = (*dev->get_stats)(dev))) + if (dev_isalive(dev)) { + stats = dev->get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); - + } read_unlock(&dev_base_lock); return ret; } @@ -318,7 +318,7 @@ static struct attribute_group netstat_group = { .attrs = netstat_attrs, }; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS /* helper function that does all the locking etc for wireless stats */ static ssize_t wireless_show(struct device *d, char *buf, ssize_t (*format)(const struct iw_statistics *, @@ -457,10 +457,9 @@ int netdev_register_kobject(struct net_device *net) strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); #ifdef CONFIG_SYSFS - if (net->get_stats) - *groups++ = &netstat_group; + *groups++ = &netstat_group; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WIRELESS_EXT_SYSFS if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif @@ -469,6 +468,19 @@ int netdev_register_kobject(struct net_device *net) return device_add(dev); } +int netdev_class_create_file(struct class_attribute *class_attr) +{ + return class_create_file(&net_class, class_attr); +} + +void netdev_class_remove_file(struct class_attribute *class_attr) +{ + class_remove_file(&net_class, class_attr); +} + +EXPORT_SYMBOL(netdev_class_create_file); +EXPORT_SYMBOL(netdev_class_remove_file); + void netdev_initialize_kobject(struct net_device *net) { struct device *device = &(net->dev); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8fb134d..c127208 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,25 +58,27 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { __kfree_skb(skb); continue; } + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); - netif_tx_lock(dev); - if ((netif_queue_stopped(dev) || - netif_subqueue_stopped(dev, skb)) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + __netif_tx_lock(txq, smp_processor_id()); + if (netif_tx_queue_stopped(txq) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } - netif_tx_unlock(dev); + __netif_tx_unlock(txq); local_irq_restore(flags); } } @@ -278,17 +280,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { + struct netdev_queue *txq; unsigned long flags; + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + local_irq_save(flags); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (netif_tx_trylock(dev)) { - if (!netif_queue_stopped(dev) && - !netif_subqueue_stopped(dev, skb)) + if (__netif_tx_trylock(txq)) { + if (!netif_tx_queue_stopped(txq)) status = dev->hard_start_xmit(skb, dev); - netif_tx_unlock(dev); + __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) break; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fdf5377..c7d484f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1875,7 +1875,7 @@ static int pktgen_device_event(struct notifier_block *unused, { struct net_device *dev = ptr; - if (dev_net(dev) != &init_net) + if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; /* It is OK that we do not hold the group lock right now, @@ -2123,6 +2123,24 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) } } #endif +static void set_cur_queue_map(struct pktgen_dev *pkt_dev) +{ + if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - + pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } +} + /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ @@ -2325,19 +2343,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->cur_pkt_size = t; } - if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { - __u16 t; - if (pkt_dev->flags & F_QUEUE_MAP_RND) { - t = random32() % - (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) - + pkt_dev->queue_map_min; - } else { - t = pkt_dev->cur_queue_map + 1; - if (t > pkt_dev->queue_map_max) - t = pkt_dev->queue_map_min; - } - pkt_dev->cur_queue_map = t; - } + set_cur_queue_map(pkt_dev); pkt_dev->flows[flow].count++; } @@ -2458,7 +2464,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ - + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2469,6 +2475,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); datalen = (odev->hard_header_len + 16) & ~0xf; @@ -2507,7 +2514,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2797,6 +2804,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ + u16 queue_map; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2807,6 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, /* Update any of the values, used when we're incrementing various * fields. */ + queue_map = pkt_dev->cur_queue_map; mod_cur_headers(pkt_dev); skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + @@ -2844,7 +2853,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); - skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); + skb_set_queue_mapping(skb, queue_map); iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3263,7 +3272,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = NULL; + struct netdev_queue *txq; __u64 idle_start = 0; + u16 queue_map; int ret; odev = pkt_dev->odev; @@ -3285,9 +3296,15 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - if ((netif_queue_stopped(odev) || - (pkt_dev->skb && - netif_subqueue_stopped(odev, pkt_dev->skb))) || + if (!pkt_dev->skb) { + set_cur_queue_map(pkt_dev); + queue_map = pkt_dev->cur_queue_map; + } else { + queue_map = skb_get_queue_mapping(pkt_dev->skb); + } + + txq = netdev_get_tx_queue(odev, queue_map); + if (netif_tx_queue_stopped(txq) || need_resched()) { idle_start = getCurUs(); @@ -3303,8 +3320,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_queue_stopped(odev) || - netif_subqueue_stopped(odev, pkt_dev->skb)) { + if (netif_tx_queue_stopped(txq)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3331,9 +3347,12 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - netif_tx_lock_bh(odev); - if (!netif_queue_stopped(odev) && - !netif_subqueue_stopped(odev, pkt_dev->skb)) { + /* fill_packet() might have changed the queue */ + queue_map = skb_get_queue_mapping(pkt_dev->skb); + txq = netdev_get_tx_queue(odev, queue_map); + + __netif_tx_lock_bh(txq); + if (!netif_tx_queue_stopped(txq)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: @@ -3377,7 +3396,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->next_tx_ns = 0; } - netif_tx_unlock_bh(odev); + __netif_tx_unlock_bh(txq); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a9a7721..71edb8b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -605,8 +605,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) { + struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; + struct net_device_stats *stats; + struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -633,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - if (dev->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); + txq = netdev_get_tx_queue(dev, 0); + if (txq->qdisc_sleeping) + NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); if (1) { struct rtnl_link_ifmap map = { @@ -653,19 +657,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } - if (dev->get_stats) { - struct net_device_stats *stats = dev->get_stats(dev); - if (stats) { - struct nlattr *attr; + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); + if (attr == NULL) + goto nla_put_failure; - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) - goto nla_put_failure; - - copy_rtnl_link_stats(nla_data(attr), stats); - } - } + stats = dev->get_stats(dev); + copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3666216..e411567 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4,8 +4,6 @@ * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ - * * Fixes: * Alan Cox : Fixed the worst of the load * balancer bugs. @@ -461,6 +459,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->tc_verd = old->tc_verd; #endif #endif + new->vlan_tci = old->vlan_tci; + skb_copy_secmark(new, old); } @@ -1282,114 +1282,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, return 0; } -/* - * Map linear and fragment data from the skb to spd. Returns number of - * pages mapped. - */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *total_len, - struct splice_pipe_desc *spd) -{ - unsigned int nr_pages = spd->nr_pages; - unsigned int poff, plen, len, toff, tlen; - int headlen, seg, error = 0; - - toff = *offset; - tlen = *total_len; - if (!tlen) { - error = 1; - goto err; +static inline void __segment_seek(struct page **page, unsigned int *poff, + unsigned int *plen, unsigned int off) +{ + *poff += off; + *page += *poff / PAGE_SIZE; + *poff = *poff % PAGE_SIZE; + *plen -= off; +} + +static inline int __splice_segment(struct page *page, unsigned int poff, + unsigned int plen, unsigned int *off, + unsigned int *len, struct sk_buff *skb, + struct splice_pipe_desc *spd) +{ + if (!*len) + return 1; + + /* skip this segment if already processed */ + if (*off >= plen) { + *off -= plen; + return 0; } - /* - * if the offset is greater than the linear part, go directly to - * the fragments. - */ - headlen = skb_headlen(skb); - if (toff >= headlen) { - toff -= headlen; - goto map_frag; + /* ignore any bits we already processed */ + if (*off) { + __segment_seek(&page, &poff, &plen, *off); + *off = 0; } - /* - * first map the linear region into the pages/partial map, skipping - * any potential initial offset. - */ - len = 0; - while (len < headlen) { - void *p = skb->data + len; - - poff = (unsigned long) p & (PAGE_SIZE - 1); - plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff); - len += plen; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } + do { + unsigned int flen = min(*len, plen); - plen = min(plen, tlen); - if (!plen) - break; + /* the linear region may spread across several pages */ + flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - /* - * just jump directly to update and return, no point - * in going over fragments when the output is full. - */ - error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb); - if (error) - goto done; + if (spd_fill_page(spd, page, flen, poff, skb)) + return 1; - tlen -= plen; - } + __segment_seek(&page, &poff, &plen, flen); + *len -= flen; + + } while (*len && plen); + + return 0; +} + +/* + * Map linear and fragment data from the skb to spd. It reports failure if the + * pipe is full or if we already spliced the requested length. + */ +static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, + unsigned int *len, + struct splice_pipe_desc *spd) +{ + int seg; + + /* + * map the linear part + */ + if (__splice_segment(virt_to_page(skb->data), + (unsigned long) skb->data & (PAGE_SIZE - 1), + skb_headlen(skb), + offset, len, skb, spd)) + return 1; /* * then map the fragments */ -map_frag: for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - plen = f->size; - poff = f->page_offset; - - if (toff) { - if (plen <= toff) { - toff -= plen; - continue; - } - plen -= toff; - poff += toff; - toff = 0; - } - - plen = min(plen, tlen); - if (!plen) - break; - - error = spd_fill_page(spd, f->page, plen, poff, skb); - if (error) - break; - - tlen -= plen; + if (__splice_segment(f->page, f->page_offset, f->size, + offset, len, skb, spd)) + return 1; } -done: - if (spd->nr_pages - nr_pages) { - *offset = 0; - *total_len = tlen; - return 0; - } -err: - /* update the offset to reflect the linear part skip, if any */ - if (!error) - *offset = toff; - return error; + return 0; } /* @@ -2288,6 +2257,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) skb_copy_queue_mapping(nskb, skb); nskb->priority = skb->priority; nskb->protocol = skb->protocol; + nskb->vlan_tci = skb->vlan_tci; nskb->dst = dst_clone(skb->dst); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); nskb->pkt_type = skb->pkt_type; @@ -2592,6 +2562,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) return true; } +void __skb_warn_lro_forwarding(const struct sk_buff *skb) +{ + if (net_ratelimit()) + pr_warning("%s: received packets cannot be forwarded" + " while LRO is enabled\n", skb->dev->name); +} + EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); @@ -2625,6 +2602,7 @@ EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); EXPORT_SYMBOL(skb_append_datato_frags); +EXPORT_SYMBOL(__skb_warn_lro_forwarding); EXPORT_SYMBOL_GPL(skb_to_sgvec); EXPORT_SYMBOL_GPL(skb_cow_data); diff --git a/net/core/sock.c b/net/core/sock.c index 88094cb..10a64d5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -7,8 +7,6 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> @@ -1068,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) * to be taken into account in all callers. -acme */ sk_refcnt_debug_inc(newsk); - newsk->sk_socket = NULL; + sk_set_socket(newsk, NULL); newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) @@ -1444,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) /* Under pressure. */ if (allocated > prot->sysctl_mem[1]) if (prot->enter_memory_pressure) - prot->enter_memory_pressure(); + prot->enter_memory_pressure(sk); /* Over hard limit. */ if (allocated > prot->sysctl_mem[2]) @@ -1704,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; sk->sk_state = TCP_CLOSE; - sk->sk_socket = sock; + sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5fc8010..a570e2a 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = { #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_SOMAXCONN, - .procname = "somaxconn", - .data = &init_net.core.sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, @@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = { { .ctl_name = 0 } }; +static struct ctl_table netns_core_table[] = { + { + .ctl_name = NET_CORE_SOMAXCONN, + .procname = "somaxconn", + .data = &init_net.core.sysctl_somaxconn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = 0 } +}; + static __net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, @@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = { static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl, *tmp; + struct ctl_table *tbl; net->core.sysctl_somaxconn = SOMAXCONN; - tbl = net_core_table; + tbl = netns_core_table; if (net != &init_net) { - tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); + tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; - for (tmp = tbl; tmp->procname; tmp++) { - if (tmp->data >= (void *)&init_net && - tmp->data < (void *)(&init_net + 1)) - tmp->data += (char *)net - (char *)&init_net; - else - tmp->mode &= ~0222; - } + tbl[0].data = &net->core.sysctl_somaxconn; } net->core.sysctl_hdr = register_net_sysctl_table(net, @@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net) return 0; err_reg: - if (tbl != net_core_table) + if (tbl != netns_core_table) kfree(tbl); err_dup: return -ENOMEM; @@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) tbl = net->core.sysctl_hdr->ctl_table_arg; unregister_net_sysctl_table(net->core.sysctl_hdr); - BUG_ON(tbl == net_core_table); + BUG_ON(tbl == netns_core_table); kfree(tbl); } @@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } |