summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c1006
-rw-r--r--net/core/dev_addr_lists.c741
-rw-r--r--net/core/dev_mcast.c232
-rw-r--r--net/core/dst.c41
-rw-r--r--net/core/ethtool.c148
-rw-r--r--net/core/fib_rules.c10
-rw-r--r--net/core/flow.c405
-rw-r--r--net/core/net-sysfs.c230
-rw-r--r--net/core/pktgen.c58
-rw-r--r--net/core/rtnetlink.c44
-rw-r--r--net/core/skbuff.c2
12 files changed, 1772 insertions, 1147 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac..51c3eec 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
+obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_XFRM) += flow.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 1c8a0ce..a10a216 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
#include <linux/jhash.h>
#include <linux/random.h>
#include <trace/events/napi.h>
+#include <linux/pci.h>
#include "net-sysfs.h"
@@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
+static inline void rps_lock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+ spin_lock(&queue->input_pkt_queue.lock);
+#endif
+}
+
+static inline void rps_unlock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+ spin_unlock(&queue->input_pkt_queue.lock);
+#endif
+}
+
/* Device list insertion */
static int list_netdevice(struct net_device *dev)
{
@@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
- struct net_device *dev;
+ struct net_device *dev, *ret = NULL;
- rtnl_lock();
- dev = __dev_getfirstbyhwtype(net, type);
- if (dev)
- dev_hold(dev);
- rtnl_unlock();
- return dev;
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev)
+ if (dev->type == type) {
+ dev_hold(dev);
+ ret = dev;
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(dev_getfirstbyhwtype);
@@ -1085,9 +1103,9 @@ void netdev_state_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_state_change);
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
{
- call_netdevice_notifiers(event, dev);
+ return call_netdevice_notifiers(event, dev);
}
EXPORT_SYMBOL(netdev_bonding_change);
@@ -1784,18 +1802,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
+ if (!(dev->features & NETIF_F_HIGHDMA)) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+ return 1;
+ }
- if (dev->features & NETIF_F_HIGHDMA)
- return 0;
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- if (PageHighMem(skb_shinfo(skb)->frags[i].page))
- return 1;
+ if (PCI_DMA_BUS_IS_PHYS) {
+ struct device *pdev = dev->dev.parent;
+ if (!pdev)
+ return 0;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+ if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+ return 1;
+ }
+ }
#endif
return 0;
}
@@ -1932,7 +1959,7 @@ out_kfree_skb:
return rc;
}
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
{
@@ -1950,7 +1977,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
else
hash = skb->protocol;
- hash = jhash_1word(hash, skb_tx_hashrnd);
+ hash = jhash_1word(hash, hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
}
@@ -1960,10 +1987,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
if (unlikely(queue_index >= dev->real_num_tx_queues)) {
if (net_ratelimit()) {
- WARN(1, "%s selects TX queue %d, but "
+ netdev_warn(dev, "selects TX queue %d, but "
"real number of TX queues is %d\n",
- dev->name, queue_index,
- dev->real_num_tx_queues);
+ queue_index, dev->real_num_tx_queues);
}
return 0;
}
@@ -2176,6 +2202,178 @@ int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+#ifdef CONFIG_RPS
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip;
+ struct netdev_rx_queue *rxqueue;
+ struct rps_map *map;
+ int cpu = -1;
+ u8 ip_proto;
+ u32 addr1, addr2, ports, ihl;
+
+ rcu_read_lock();
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+ if (unlikely(index >= dev->num_rx_queues)) {
+ if (net_ratelimit()) {
+ netdev_warn(dev, "received packet on queue "
+ "%u, but number of RX queues is %u\n",
+ index, dev->num_rx_queues);
+ }
+ goto done;
+ }
+ rxqueue = dev->_rx + index;
+ } else
+ rxqueue = dev->_rx;
+
+ if (!rxqueue->rps_map)
+ goto done;
+
+ if (skb->rxhash)
+ goto got_hash; /* Skip hash computation on packet header */
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ if (!pskb_may_pull(skb, sizeof(*ip)))
+ goto done;
+
+ ip = (struct iphdr *) skb->data;
+ ip_proto = ip->protocol;
+ addr1 = ip->saddr;
+ addr2 = ip->daddr;
+ ihl = ip->ihl;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ if (!pskb_may_pull(skb, sizeof(*ip6)))
+ goto done;
+
+ ip6 = (struct ipv6hdr *) skb->data;
+ ip_proto = ip6->nexthdr;
+ addr1 = ip6->saddr.s6_addr32[3];
+ addr2 = ip6->daddr.s6_addr32[3];
+ ihl = (40 >> 2);
+ break;
+ default:
+ goto done;
+ }
+ ports = 0;
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_AH:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ if (pskb_may_pull(skb, (ihl * 4) + 4))
+ ports = *((u32 *) (skb->data + (ihl * 4)));
+ break;
+
+ default:
+ break;
+ }
+
+ skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+ if (!skb->rxhash)
+ skb->rxhash = 1;
+
+got_hash:
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map) {
+ u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+ if (cpu_online(tcpu)) {
+ cpu = tcpu;
+ goto done;
+ }
+ }
+
+done:
+ rcu_read_unlock();
+ return cpu;
+}
+
+/*
+ * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * to be sent to kick remote softirq processing. There are two masks since
+ * the sending of IPIs must be done with interrupts enabled. The select field
+ * indicates the current mask that enqueue_backlog uses to schedule IPIs.
+ * select is flipped before net_rps_action is called while still under lock,
+ * net_rps_action then uses the non-selected mask to send the IPIs and clears
+ * it without conflicting with enqueue_backlog operation.
+ */
+struct rps_remote_softirq_cpus {
+ cpumask_t mask[2];
+ int select;
+};
+static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
+
+/* Called from hardirq (IPI) context */
+static void trigger_softirq(void *data)
+{
+ struct softnet_data *queue = data;
+ __napi_schedule(&queue->backlog);
+ __get_cpu_var(netdev_rx_stat).received_rps++;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+{
+ struct softnet_data *queue;
+ unsigned long flags;
+
+ queue = &per_cpu(softnet_data, cpu);
+
+ local_irq_save(flags);
+ __get_cpu_var(netdev_rx_stat).total++;
+
+ rps_lock(queue);
+ if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+ if (queue->input_pkt_queue.qlen) {
+enqueue:
+ __skb_queue_tail(&queue->input_pkt_queue, skb);
+ rps_unlock(queue);
+ local_irq_restore(flags);
+ return NET_RX_SUCCESS;
+ }
+
+ /* Schedule NAPI for backlog device */
+ if (napi_schedule_prep(&queue->backlog)) {
+#ifdef CONFIG_RPS
+ if (cpu != smp_processor_id()) {
+ struct rps_remote_softirq_cpus *rcpus =
+ &__get_cpu_var(rps_remote_softirq_cpus);
+
+ cpu_set(cpu, rcpus->mask[rcpus->select]);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ } else
+ __napi_schedule(&queue->backlog);
+#else
+ __napi_schedule(&queue->backlog);
+#endif
+ }
+ goto enqueue;
+ }
+
+ rps_unlock(queue);
+
+ __get_cpu_var(netdev_rx_stat).dropped++;
+ local_irq_restore(flags);
+
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
/**
* netif_rx - post buffer to the network code
@@ -2194,8 +2392,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
int netif_rx(struct sk_buff *skb)
{
- struct softnet_data *queue;
- unsigned long flags;
+ int cpu;
/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
@@ -2204,31 +2401,15 @@ int netif_rx(struct sk_buff *skb)
if (!skb->tstamp.tv64)
net_timestamp(skb);
- /*
- * The code is rearranged so that the path is the most
- * short when CPU is congested, but is still operating.
- */
- local_irq_save(flags);
- queue = &__get_cpu_var(softnet_data);
-
- __get_cpu_var(netdev_rx_stat).total++;
- if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
- if (queue->input_pkt_queue.qlen) {
-enqueue:
- __skb_queue_tail(&queue->input_pkt_queue, skb);
- local_irq_restore(flags);
- return NET_RX_SUCCESS;
- }
-
- napi_schedule(&queue->backlog);
- goto enqueue;
- }
-
- __get_cpu_var(netdev_rx_stat).dropped++;
- local_irq_restore(flags);
+#ifdef CONFIG_RPS
+ cpu = get_rps_cpu(skb->dev, skb);
+ if (cpu < 0)
+ cpu = smp_processor_id();
+#else
+ cpu = smp_processor_id();
+#endif
- kfree_skb(skb);
- return NET_RX_DROP;
+ return enqueue_to_backlog(skb, cpu);
}
EXPORT_SYMBOL(netif_rx);
@@ -2465,22 +2646,7 @@ void netif_nit_deliver(struct sk_buff *skb)
rcu_read_unlock();
}
-/**
- * netif_receive_skb - process receive buffer from network
- * @skb: buffer to process
- *
- * netif_receive_skb() is the main receive data processing function.
- * It always succeeds. The buffer may be dropped during processing
- * for congestion control or by the protocol layers.
- *
- * This function may only be called from softirq context and interrupts
- * should be enabled.
- *
- * Return values (usually ignored):
- * NET_RX_SUCCESS: no congestion
- * NET_RX_DROP: packet was dropped
- */
-int netif_receive_skb(struct sk_buff *skb)
+static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
@@ -2591,6 +2757,37 @@ out:
rcu_read_unlock();
return ret;
}
+
+/**
+ * netif_receive_skb - process receive buffer from network
+ * @skb: buffer to process
+ *
+ * netif_receive_skb() is the main receive data processing function.
+ * It always succeeds. The buffer may be dropped during processing
+ * for congestion control or by the protocol layers.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+#ifdef CONFIG_RPS
+ int cpu;
+
+ cpu = get_rps_cpu(skb->dev, skb);
+
+ if (cpu < 0)
+ return __netif_receive_skb(skb);
+ else
+ return enqueue_to_backlog(skb, cpu);
+#else
+ return __netif_receive_skb(skb);
+#endif
+}
EXPORT_SYMBOL(netif_receive_skb);
/* Network device is going away, flush any packets still pending */
@@ -2600,11 +2797,13 @@ static void flush_backlog(void *arg)
struct softnet_data *queue = &__get_cpu_var(softnet_data);
struct sk_buff *skb, *tmp;
+ rps_lock(queue);
skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
if (skb->dev == dev) {
__skb_unlink(skb, &queue->input_pkt_queue);
kfree_skb(skb);
}
+ rps_unlock(queue);
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -2918,15 +3117,18 @@ static int process_backlog(struct napi_struct *napi, int quota)
struct sk_buff *skb;
local_irq_disable();
+ rps_lock(queue);
skb = __skb_dequeue(&queue->input_pkt_queue);
if (!skb) {
__napi_complete(napi);
+ rps_unlock(queue);
local_irq_enable();
break;
}
+ rps_unlock(queue);
local_irq_enable();
- netif_receive_skb(skb);
+ __netif_receive_skb(skb);
} while (++work < quota && jiffies == start_time);
return work;
@@ -3015,6 +3217,24 @@ void netif_napi_del(struct napi_struct *napi)
}
EXPORT_SYMBOL(netif_napi_del);
+#ifdef CONFIG_RPS
+/*
+ * net_rps_action sends any pending IPI's for rps. This is only called from
+ * softirq and interrupts must be enabled.
+ */
+static void net_rps_action(cpumask_t *mask)
+{
+ int cpu;
+
+ /* Send pending IPI's to kick RPS processing on remote cpus. */
+ for_each_cpu_mask_nr(cpu, *mask) {
+ struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+ if (cpu_online(cpu))
+ __smp_call_function_single(cpu, &queue->csd, 0);
+ }
+ cpus_clear(*mask);
+}
+#endif
static void net_rx_action(struct softirq_action *h)
{
@@ -3022,6 +3242,10 @@ static void net_rx_action(struct softirq_action *h)
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
+#ifdef CONFIG_RPS
+ int select;
+ struct rps_remote_softirq_cpus *rcpus;
+#endif
local_irq_disable();
@@ -3084,7 +3308,17 @@ static void net_rx_action(struct softirq_action *h)
netpoll_poll_unlock(have);
}
out:
+#ifdef CONFIG_RPS
+ rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
+ select = rcpus->select;
+ rcpus->select ^= 1;
+
+ local_irq_enable();
+
+ net_rps_action(&rcpus->mask[select]);
+#else
local_irq_enable();
+#endif
#ifdef CONFIG_NET_DMA
/*
@@ -3330,10 +3564,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
{
struct netif_rx_stats *s = v;
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
s->total, s->dropped, s->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
- s->cpu_collision);
+ s->cpu_collision, s->received_rps);
return 0;
}
@@ -3556,11 +3790,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
slave->master = master;
- synchronize_net();
-
- if (old)
+ if (old) {
+ synchronize_net();
dev_put(old);
-
+ }
if (master)
slave->flags |= IFF_SLAVE;
else
@@ -3737,562 +3970,6 @@ void dev_set_rx_mode(struct net_device *dev)
netif_addr_unlock_bh(dev);
}
-/* hw addresses list handling functions */
-
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
- int alloc_size;
-
- if (addr_len > MAX_ADDR_LEN)
- return -EINVAL;
-
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- ha->type == addr_type) {
- ha->refcount++;
- return 0;
- }
- }
-
-
- alloc_size = sizeof(*ha);
- if (alloc_size < L1_CACHE_BYTES)
- alloc_size = L1_CACHE_BYTES;
- ha = kmalloc(alloc_size, GFP_ATOMIC);
- if (!ha)
- return -ENOMEM;
- memcpy(ha->addr, addr, addr_len);
- ha->type = addr_type;
- ha->refcount = 1;
- ha->synced = false;
- list_add_tail_rcu(&ha->list, &list->list);
- list->count++;
- return 0;
-}
-
-static void ha_rcu_free(struct rcu_head *head)
-{
- struct netdev_hw_addr *ha;
-
- ha = container_of(head, struct netdev_hw_addr, rcu_head);
- kfree(ha);
-}
-
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
-
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- (ha->type == addr_type || !addr_type)) {
- if (--ha->refcount)
- return 0;
- list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
- list->count--;
- return 0;
- }
- }
- return -ENOENT;
-}
-
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len,
- unsigned char addr_type)
-{
- int err;
- struct netdev_hw_addr *ha, *ha2;
- unsigned char type;
-
- list_for_each_entry(ha, &from_list->list, list) {
- type = addr_type ? addr_type : ha->type;
- err = __hw_addr_add(to_list, ha->addr, addr_len, type);
- if (err)
- goto unroll;
- }
- return 0;
-
-unroll:
- list_for_each_entry(ha2, &from_list->list, list) {
- if (ha2 == ha)
- break;
- type = addr_type ? addr_type : ha2->type;
- __hw_addr_del(to_list, ha2->addr, addr_len, type);
- }
- return err;
-}
-
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len,
- unsigned char addr_type)
-{
- struct netdev_hw_addr *ha;
- unsigned char type;
-
- list_for_each_entry(ha, &from_list->list, list) {
- type = addr_type ? addr_type : ha->type;
- __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
- }
-}
-
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len)
-{
- int err = 0;
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (!ha->synced) {
- err = __hw_addr_add(to_list, ha->addr,
- addr_len, ha->type);
- if (err)
- break;
- ha->synced = true;
- ha->refcount++;
- } else if (ha->refcount == 1) {
- __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
- __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
- }
- }
- return err;
-}
-
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
- struct netdev_hw_addr_list *from_list,
- int addr_len)
-{
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (ha->synced) {
- __hw_addr_del(to_list, ha->addr,
- addr_len, ha->type);
- ha->synced = false;
- __hw_addr_del(from_list, ha->addr,
- addr_len, ha->type);
- }
- }
-}
-
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
- struct netdev_hw_addr *ha, *tmp;
-
- list_for_each_entry_safe(ha, tmp, &list->list, list) {
- list_del_rcu(&ha->list);
- call_rcu(&ha->rcu_head, ha_rcu_free);
- }
- list->count = 0;
-}
-
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
- INIT_LIST_HEAD(&list->list);
- list->count = 0;
-}
-
-/* Device addresses handling functions */
-
-static void dev_addr_flush(struct net_device *dev)
-{
- /* rtnl_mutex must be held here */
-
- __hw_addr_flush(&dev->dev_addrs);
- dev->dev_addr = NULL;
-}
-
-static int dev_addr_init(struct net_device *dev)
-{
- unsigned char addr[MAX_ADDR_LEN];
- struct netdev_hw_addr *ha;
- int err;
-
- /* rtnl_mutex must be held here */
-
- __hw_addr_init(&dev->dev_addrs);
- memset(addr, 0, sizeof(addr));
- err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
- NETDEV_HW_ADDR_T_LAN);
- if (!err) {
- /*
- * Get the first (previously created) address from the list
- * and set dev_addr pointer to this location.
- */
- ha = list_first_entry(&dev->dev_addrs.list,
- struct netdev_hw_addr, list);
- dev->dev_addr = ha->addr;
- }
- return err;
-}
-
-/**
- * dev_addr_add - Add a device address
- * @dev: device
- * @addr: address to add
- * @addr_type: address type
- *
- * Add a device address to the device or increase the reference count if
- * it already exists.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
- unsigned char addr_type)
-{
- int err;
-
- ASSERT_RTNL();
-
- err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-
-/**
- * dev_addr_del - Release a device address.
- * @dev: device
- * @addr: address to delete
- * @addr_type: address type
- *
- * Release reference to a device address and remove it from the device
- * if the reference count drops to zero.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
- unsigned char addr_type)
-{
- int err;
- struct netdev_hw_addr *ha;
-
- ASSERT_RTNL();
-
- /*
- * We can not remove the first address from the list because
- * dev->dev_addr points to that.
- */
- ha = list_first_entry(&dev->dev_addrs.list,
- struct netdev_hw_addr, list);
- if (ha->addr == dev->dev_addr && ha->refcount == 1)
- return -ENOENT;
-
- err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
- addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-
-/**
- * dev_addr_add_multiple - Add device addresses from another device
- * @to_dev: device to which addresses will be added
- * @from_dev: device from which addresses will be added
- * @addr_type: address type - 0 means type will be used from from_dev
- *
- * Add device addresses of the one device to another.
- **
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
- struct net_device *from_dev,
- unsigned char addr_type)
-{
- int err;
-
- ASSERT_RTNL();
-
- if (from_dev->addr_len != to_dev->addr_len)
- return -EINVAL;
- err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
- to_dev->addr_len, addr_type);
- if (!err)
- call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
- return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-
-/**
- * dev_addr_del_multiple - Delete device addresses by another device
- * @to_dev: device where the addresses will be deleted
- * @from_dev: device by which addresses the addresses will be deleted
- * @addr_type: address type - 0 means type will used from from_dev
- *
- * Deletes addresses in to device by the list of addresses in from device.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
- struct net_device *from_dev,
- unsigned char addr_type)
-{
- ASSERT_RTNL();
-
- if (from_dev->addr_len != to_dev->addr_len)
- return -EINVAL;
- __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
- to_dev->addr_len, addr_type);
- call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
- return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-
-/* multicast addresses handling functions */
-
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
- void *addr, int alen, int glbl)
-{
- struct dev_addr_list *da;
-
- for (; (da = *list) != NULL; list = &da->next) {
- if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
- alen == da->da_addrlen) {
- if (glbl) {
- int old_glbl = da->da_gusers;
- da->da_gusers = 0;
- if (old_glbl == 0)
- break;
- }
- if (--da->da_users)
- return 0;
-
- *list = da->next;
- kfree(da);
- (*count)--;
- return 0;
- }
- }
- return -ENOENT;
-}
-
-int __dev_addr_add(struct dev_addr_list **list, int *count,
- void *addr, int alen, int glbl)
-{
- struct dev_addr_list *da;
-
- for (da = *list; da != NULL; da = da->next) {
- if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
- da->da_addrlen == alen) {
- if (glbl) {
- int old_glbl = da->da_gusers;
- da->da_gusers = 1;
- if (old_glbl)
- return 0;
- }
- da->da_users++;
- return 0;
- }
- }
-
- da = kzalloc(sizeof(*da), GFP_ATOMIC);
- if (da == NULL)
- return -ENOMEM;
- memcpy(da->da_addr, addr, alen);
- da->da_addrlen = alen;
- da->da_users = 1;
- da->da_gusers = glbl ? 1 : 0;
- da->next = *list;
- *list = da;
- (*count)++;
- return 0;
-}
-
-/**
- * dev_unicast_delete - Release secondary unicast address.
- * @dev: device
- * @addr: address to delete
- *
- * Release reference to a secondary unicast address and remove it
- * from the device if the reference count drops to zero.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
- int err;
-
- ASSERT_RTNL();
-
- netif_addr_lock_bh(dev);
- err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-
-/**
- * dev_unicast_add - add a secondary unicast address
- * @dev: device
- * @addr: address to add
- *
- * Add a secondary unicast address to the device or increase
- * the reference count if it already exists.
- *
- * The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
- int err;
-
- ASSERT_RTNL();
-
- netif_addr_lock_bh(dev);
- err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
-
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
- struct dev_addr_list **from, int *from_count)
-{
- struct dev_addr_list *da, *next;
- int err = 0;
-
- da = *from;
- while (da != NULL) {
- next = da->next;
- if (!da->da_synced) {
- err = __dev_addr_add(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- if (err < 0)
- break;
- da->da_synced = 1;
- da->da_users++;
- } else if (da->da_users == 1) {
- __dev_addr_delete(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- __dev_addr_delete(from, from_count,
- da->da_addr, da->da_addrlen, 0);
- }
- da = next;
- }
- return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
- struct dev_addr_list **from, int *from_count)
-{
- struct dev_addr_list *da, *next;
-
- da = *from;
- while (da != NULL) {
- next = da->next;
- if (da->da_synced) {
- __dev_addr_delete(to, to_count,
- da->da_addr, da->da_addrlen, 0);
- da->da_synced = 0;
- __dev_addr_delete(from, from_count,
- da->da_addr, da->da_addrlen, 0);
- }
- da = next;
- }
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-
-/**
- * dev_unicast_sync - Synchronize device's unicast list to another device
- * @to: destination device
- * @from: source device
- *
- * Add newly added addresses to the destination device and release
- * addresses that have no users left. The source device must be
- * locked by netif_tx_lock_bh.
- *
- * This function is intended to be called from the dev->set_rx_mode
- * function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
- int err = 0;
-
- if (to->addr_len != from->addr_len)
- return -EINVAL;
-
- netif_addr_lock_bh(to);
- err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
- if (!err)
- __dev_set_rx_mode(to);
- netif_addr_unlock_bh(to);
- return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-
-/**
- * dev_unicast_unsync - Remove synchronized addresses from the destination device
- * @to: destination device
- * @from: source device
- *
- * Remove all addresses that were added to the destination device by
- * dev_unicast_sync(). This function is intended to be called from the
- * dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
- if (to->addr_len != from->addr_len)
- return;
-
- netif_addr_lock_bh(from);
- netif_addr_lock(to);
- __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
- __dev_set_rx_mode(to);
- netif_addr_unlock(to);
- netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-
-static void dev_unicast_flush(struct net_device *dev)
-{
- netif_addr_lock_bh(dev);
- __hw_addr_flush(&dev->uc);
- netif_addr_unlock_bh(dev);
-}
-
-static void dev_unicast_init(struct net_device *dev)
-{
- __hw_addr_init(&dev->uc);
-}
-
-
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
- struct dev_addr_list *tmp;
-
- while (*list != NULL) {
- tmp = *list;
- *list = tmp->next;
- if (tmp->da_users > tmp->da_gusers)
- printk("__dev_addr_discard: address leakage! "
- "da_users=%d\n", tmp->da_users);
- kfree(tmp);
- }
-}
-
-static void dev_addr_discard(struct net_device *dev)
-{
- netif_addr_lock_bh(dev);
-
- __dev_addr_discard(&dev->mc_list);
- netdev_mc_count(dev) = 0;
-
- netif_addr_unlock_bh(dev);
-}
-
/**
* dev_get_flags - get flags reported to userspace
* @dev: device
@@ -4603,8 +4280,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
- dev->addr_len, 1);
+ return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCDELMULTI:
if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4612,8 +4288,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
- dev->addr_len, 1);
+ return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
@@ -4920,8 +4595,8 @@ static void rollback_registered_many(struct list_head *head)
/*
* Flush the unicast and multicast chains
*/
- dev_unicast_flush(dev);
- dev_addr_discard(dev);
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
@@ -5070,6 +4745,24 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
+#ifdef CONFIG_RPS
+ if (!dev->num_rx_queues) {
+ /*
+ * Allocate a single RX queue if driver never called
+ * alloc_netdev_mq
+ */
+
+ dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!dev->_rx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ dev->_rx->first = dev->_rx;
+ atomic_set(&dev->_rx->count, 1);
+ dev->num_rx_queues = 1;
+ }
+#endif
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -5430,6 +5123,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
+#ifdef CONFIG_RPS
+ struct netdev_rx_queue *rx;
+ int i;
+#endif
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -5455,13 +5152,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
goto free_p;
}
+#ifdef CONFIG_RPS
+ rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!rx) {
+ printk(KERN_ERR "alloc_netdev: Unable to allocate "
+ "rx queues.\n");
+ goto free_tx;
+ }
+
+ atomic_set(&rx->count, queue_count);
+
+ /*
+ * Set a pointer to first element in the array which holds the
+ * reference count.
+ */
+ for (i = 0; i < queue_count; i++)
+ rx[i].first = rx;
+#endif
+
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
if (dev_addr_init(dev))
- goto free_tx;
+ goto free_rx;
- dev_unicast_init(dev);
+ dev_mc_init(dev);
+ dev_uc_init(dev);
dev_net_set(dev, &init_net);
@@ -5469,6 +5185,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
+#ifdef CONFIG_RPS
+ dev->_rx = rx;
+ dev->num_rx_queues = queue_count;
+#endif
+
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev);
@@ -5483,9 +5204,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
strcpy(dev->name, name);
return dev;
+free_rx:
+#ifdef CONFIG_RPS
+ kfree(rx);
free_tx:
+#endif
kfree(tx);
-
free_p:
kfree(p);
return NULL;
@@ -5687,8 +5411,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/*
* Flush the unicast and multicast chains
*/
- dev_unicast_flush(dev);
- dev_addr_discard(dev);
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
netdev_unregister_kobject(dev);
@@ -5988,6 +5712,12 @@ static int __init net_dev_init(void)
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
+#ifdef CONFIG_RPS
+ queue->csd.func = trigger_softirq;
+ queue->csd.info = queue;
+ queue->csd.flags = 0;
+#endif
+
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
queue->backlog.gro_list = NULL;
@@ -6026,7 +5756,7 @@ subsys_initcall(net_dev_init);
static int __init initialize_hashrnd(void)
{
- get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+ get_random_bytes(&hashrnd, sizeof(hashrnd));
return 0;
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 0000000..508f9c1
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,741 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+
+/*
+ * General list handling functions
+ */
+
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+ unsigned char *addr, int addr_len,
+ unsigned char addr_type, bool global)
+{
+ struct netdev_hw_addr *ha;
+ int alloc_size;
+
+ if (addr_len > MAX_ADDR_LEN)
+ return -EINVAL;
+
+ list_for_each_entry(ha, &list->list, list) {
+ if (!memcmp(ha->addr, addr, addr_len) &&
+ ha->type == addr_type) {
+ if (global) {
+ /* check if addr is already used as global */
+ if (ha->global_use)
+ return 0;
+ else
+ ha->global_use = true;
+ }
+ ha->refcount++;
+ return 0;
+ }
+ }
+
+
+ alloc_size = sizeof(*ha);
+ if (alloc_size < L1_CACHE_BYTES)
+ alloc_size = L1_CACHE_BYTES;
+ ha = kmalloc(alloc_size, GFP_ATOMIC);
+ if (!ha)
+ return -ENOMEM;
+ memcpy(ha->addr, addr, addr_len);
+ ha->type = addr_type;
+ ha->refcount = 1;
+ ha->global_use = global;
+ ha->synced = false;
+ list_add_tail_rcu(&ha->list, &list->list);
+ list->count++;
+ return 0;
+}
+
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+ int addr_len, unsigned char addr_type)
+{
+ return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+ struct netdev_hw_addr *ha;
+
+ ha = container_of(head, struct netdev_hw_addr, rcu_head);
+ kfree(ha);
+}
+
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+ unsigned char *addr, int addr_len,
+ unsigned char addr_type, bool global)
+{
+ struct netdev_hw_addr *ha;
+
+ list_for_each_entry(ha, &list->list, list) {
+ if (!memcmp(ha->addr, addr, addr_len) &&
+ (ha->type == addr_type || !addr_type)) {
+ if (global) {
+ if (!ha->global_use)
+ break;
+ else
+ ha->global_use = false;
+ }
+ if (--ha->refcount)
+ return 0;
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ list->count--;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+ int addr_len, unsigned char addr_type)
+{
+ return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len, unsigned char addr_type)
+{
+ int err;
+ struct netdev_hw_addr *ha, *ha2;
+ unsigned char type;
+
+ list_for_each_entry(ha, &from_list->list, list) {
+ type = addr_type ? addr_type : ha->type;
+ err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+ if (err)
+ goto unroll;
+ }
+ return 0;
+
+unroll:
+ list_for_each_entry(ha2, &from_list->list, list) {
+ if (ha2 == ha)
+ break;
+ type = addr_type ? addr_type : ha2->type;
+ __hw_addr_del(to_list, ha2->addr, addr_len, type);
+ }
+ return err;
+}
+EXPORT_SYMBOL(__hw_addr_add_multiple);
+
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len, unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+ unsigned char type;
+
+ list_for_each_entry(ha, &from_list->list, list) {
+ type = addr_type ? addr_type : ha->type;
+ __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_del_multiple);
+
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len)
+{
+ int err = 0;
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+ if (!ha->synced) {
+ err = __hw_addr_add(to_list, ha->addr,
+ addr_len, ha->type);
+ if (err)
+ break;
+ ha->synced = true;
+ ha->refcount++;
+ } else if (ha->refcount == 1) {
+ __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+ __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+ }
+ }
+ return err;
+}
+EXPORT_SYMBOL(__hw_addr_sync);
+
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len)
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+ if (ha->synced) {
+ __hw_addr_del(to_list, ha->addr,
+ addr_len, ha->type);
+ ha->synced = false;
+ __hw_addr_del(from_list, ha->addr,
+ addr_len, ha->type);
+ }
+ }
+}
+EXPORT_SYMBOL(__hw_addr_unsync);
+
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ list_del_rcu(&ha->list);
+ call_rcu(&ha->rcu_head, ha_rcu_free);
+ }
+ list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_flush);
+
+void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+ INIT_LIST_HEAD(&list->list);
+ list->count = 0;
+}
+EXPORT_SYMBOL(__hw_addr_init);
+
+/*
+ * Device addresses handling functions
+ */
+
+/**
+ * dev_addr_flush - Flush device address list
+ * @dev: device
+ *
+ * Flush device address list and reset ->dev_addr.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+ /* rtnl_mutex must be held here */
+
+ __hw_addr_flush(&dev->dev_addrs);
+ dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+
+/**
+ * dev_addr_init - Init device address list
+ * @dev: device
+ *
+ * Init device address list and create the first element,
+ * used by ->dev_addr.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+ unsigned char addr[MAX_ADDR_LEN];
+ struct netdev_hw_addr *ha;
+ int err;
+
+ /* rtnl_mutex must be held here */
+
+ __hw_addr_init(&dev->dev_addrs);
+ memset(addr, 0, sizeof(addr));
+ err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+ NETDEV_HW_ADDR_T_LAN);
+ if (!err) {
+ /*
+ * Get the first (previously created) address from the list
+ * and set dev_addr pointer to this location.
+ */
+ ha = list_first_entry(&dev->dev_addrs.list,
+ struct netdev_hw_addr, list);
+ dev->dev_addr = ha->addr;
+ }
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+
+/**
+ * dev_addr_add - Add a device address
+ * @dev: device
+ * @addr: address to add
+ * @addr_type: address type
+ *
+ * Add a device address to the device or increase the reference count if
+ * it already exists.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ * dev_addr_del - Release a device address.
+ * @dev: device
+ * @addr: address to delete
+ * @addr_type: address type
+ *
+ * Release reference to a device address and remove it from the device
+ * if the reference count drops to zero.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+ unsigned char addr_type)
+{
+ int err;
+ struct netdev_hw_addr *ha;
+
+ ASSERT_RTNL();
+
+ /*
+ * We can not remove the first address from the list because
+ * dev->dev_addr points to that.
+ */
+ ha = list_first_entry(&dev->dev_addrs.list,
+ struct netdev_hw_addr, list);
+ if (ha->addr == dev->dev_addr && ha->refcount == 1)
+ return -ENOENT;
+
+ err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+ addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ * dev_addr_add_multiple - Add device addresses from another device
+ * @to_dev: device to which addresses will be added
+ * @from_dev: device from which addresses will be added
+ * @addr_type: address type - 0 means type will be used from from_dev
+ *
+ * Add device addresses of the one device to another.
+ **
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ int err;
+
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+ to_dev->addr_len, addr_type);
+ if (!err)
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ * dev_addr_del_multiple - Delete device addresses by another device
+ * @to_dev: device where the addresses will be deleted
+ * @from_dev: device by which addresses the addresses will be deleted
+ * @addr_type: address type - 0 means type will used from from_dev
+ *
+ * Deletes addresses in to device by the list of addresses in from device.
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+ struct net_device *from_dev,
+ unsigned char addr_type)
+{
+ ASSERT_RTNL();
+
+ if (from_dev->addr_len != to_dev->addr_len)
+ return -EINVAL;
+ __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+ to_dev->addr_len, addr_type);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+ return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/*
+ * Unicast list handling functions
+ */
+
+/**
+ * dev_uc_add - Add a secondary unicast address
+ * @dev: device
+ * @addr: address to add
+ *
+ * Add a secondary unicast address to the device or increase
+ * the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_UNICAST);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+
+/**
+ * dev_uc_del - Release secondary unicast address.
+ * @dev: device
+ * @addr: address to delete
+ *
+ * Release reference to a secondary unicast address and remove it
+ * from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_UNICAST);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+
+/**
+ * dev_uc_sync - Synchronize device's unicast list to another device
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have no users left. The source device must be
+ * locked by netif_tx_lock_bh.
+ *
+ * This function is intended to be called from the dev->set_rx_mode
+ * function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_bh(to);
+ err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock_bh(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+
+/**
+ * dev_uc_unsync - Remove synchronized addresses from the destination device
+ * @to: destination device
+ * @from: source device
+ *
+ * Remove all addresses that were added to the destination device by
+ * dev_uc_sync(). This function is intended to be called from the
+ * dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+ if (to->addr_len != from->addr_len)
+ return;
+
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
+ __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+
+/**
+ * dev_uc_flush - Flush unicast addresses
+ * @dev: device
+ *
+ * Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+ netif_addr_lock_bh(dev);
+ __hw_addr_flush(&dev->uc);
+ netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+
+/**
+ * dev_uc_flush - Init unicast address list
+ * @dev: device
+ *
+ * Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+ __hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+
+/*
+ * Multicast list handling functions
+ */
+
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+ bool global)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_MULTICAST, global);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+/**
+ * dev_mc_add - Add a multicast address
+ * @dev: device
+ * @addr: address to add
+ *
+ * Add a multicast address to the device or increase
+ * the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+
+/**
+ * dev_mc_add_global - Add a global multicast address
+ * @dev: device
+ * @addr: address to add
+ *
+ * Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+ bool global)
+{
+ int err;
+
+ netif_addr_lock_bh(dev);
+ err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_MULTICAST, global);
+ if (!err)
+ __dev_set_rx_mode(dev);
+ netif_addr_unlock_bh(dev);
+ return err;
+}
+
+/**
+ * dev_mc_del - Delete a multicast address.
+ * @dev: device
+ * @addr: address to delete
+ *
+ * Release reference to a multicast address and remove it
+ * from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+
+/**
+ * dev_mc_del_global - Delete a global multicast address.
+ * @dev: device
+ * @addr: address to delete
+ *
+ * Release reference to a multicast address and remove it
+ * from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+ return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+
+/**
+ * dev_mc_sync - Synchronize device's unicast list to another device
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have no users left. The source device must be
+ * locked by netif_tx_lock_bh.
+ *
+ * This function is intended to be called from the dev->set_multicast_list
+ * or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_bh(to);
+ err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock_bh(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+
+/**
+ * dev_mc_unsync - Remove synchronized addresses from the destination device
+ * @to: destination device
+ * @from: source device
+ *
+ * Remove all addresses that were added to the destination device by
+ * dev_mc_sync(). This function is intended to be called from the
+ * dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+ if (to->addr_len != from->addr_len)
+ return;
+
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
+ __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+
+/**
+ * dev_mc_flush - Flush multicast addresses
+ * @dev: device
+ *
+ * Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+ netif_addr_lock_bh(dev);
+ __hw_addr_flush(&dev->mc);
+ netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+
+/**
+ * dev_mc_flush - Init multicast address list
+ * @dev: device
+ *
+ * Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+ __hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/seq_file.h>
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+ struct netdev_hw_addr *ha;
+ struct net_device *dev = v;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ netif_addr_lock_bh(dev);
+ netdev_for_each_mc_addr(ha, dev) {
+ int i;
+
+ seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+ dev->name, ha->refcount, ha->global_use);
+
+ for (i = 0; i < dev->addr_len; i++)
+ seq_printf(seq, "%02x", ha->addr[i]);
+
+ seq_putc(seq, '\n');
+ }
+ netif_addr_unlock_bh(dev);
+ return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+ .start = dev_seq_start,
+ .next = dev_seq_next,
+ .stop = dev_seq_stop,
+ .show = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &dev_mc_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = dev_mc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+#endif
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+ if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+ proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+ .init = dev_mc_net_init,
+ .exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+ register_pernet_subsys(&dev_mc_net_ops);
+}
+
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295b..0000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Linux NET3: Multicast List maintenance.
- *
- * Authors:
- * Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- * Richard Underwood <richard@wuzz.demon.co.uk>
- *
- * Stir fried together from the IP multicast and CAP patches above
- * Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- * Fixes:
- * Alan Cox : Update the device on a real delete
- * rather than any time but...
- * Alan Cox : IFF_ALLMULTI support.
- * Alan Cox : New format set_multicast_list() calls.
- * Gleb Natapov : Remove dev_mc_lock.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-
-
-/*
- * Device multicast list maintenance.
- *
- * This is used both by IP and by the user level maintenance functions.
- * Unlike BSD we maintain a usage count on a given multicast address so
- * that a casual user application can add/delete multicasts used by
- * protocols without doing damage to the protocols when it deletes the
- * entries. It also helps IP as it tracks overlapping maps.
- *
- * Device mc lists are changed by bh at least if IPv6 is enabled,
- * so that it must be bh protected.
- *
- * We block accesses to device mc filters with netif_tx_lock.
- */
-
-/*
- * Delete a device level multicast
- */
-
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
- int err;
-
- netif_addr_lock_bh(dev);
- err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
- addr, alen, glbl);
- if (!err) {
- /*
- * We have altered the list, so the card
- * loaded filter is now wrong. Fix it
- */
-
- __dev_set_rx_mode(dev);
- }
- netif_addr_unlock_bh(dev);
- return err;
-}
-
-/*
- * Add a device level multicast
- */
-
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
- int err;
-
- netif_addr_lock_bh(dev);
- if (alen != dev->addr_len)
- err = -EINVAL;
- else
- err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
- if (!err)
- __dev_set_rx_mode(dev);
- netif_addr_unlock_bh(dev);
- return err;
-}
-
-/**
- * dev_mc_sync - Synchronize device's multicast list to another device
- * @to: destination device
- * @from: source device
- *
- * Add newly added addresses to the destination device and release
- * addresses that have no users left. The source device must be
- * locked by netif_tx_lock_bh.
- *
- * This function is intended to be called from the dev->set_multicast_list
- * or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
- int err = 0;
-
- netif_addr_lock_bh(to);
- err = __dev_addr_sync(&to->mc_list, &to->mc_count,
- &from->mc_list, &from->mc_count);
- if (!err)
- __dev_set_rx_mode(to);
- netif_addr_unlock_bh(to);
-
- return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-
-
-/**
- * dev_mc_unsync - Remove synchronized addresses from the destination
- * device
- * @to: destination device
- * @from: source device
- *
- * Remove all addresses that were added to the destination device by
- * dev_mc_sync(). This function is intended to be called from the
- * dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
- netif_addr_lock_bh(from);
- netif_addr_lock(to);
-
- __dev_addr_unsync(&to->mc_list, &to->mc_count,
- &from->mc_list, &from->mc_count);
- __dev_set_rx_mode(to);
-
- netif_addr_unlock(to);
- netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
- struct dev_addr_list *m;
- struct net_device *dev = v;
-
- if (v == SEQ_START_TOKEN)
- return 0;
-
- netif_addr_lock_bh(dev);
- for (m = dev->mc_list; m; m = m->next) {
- int i;
-
- seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
- dev->name, m->dmi_users, m->dmi_gusers);
-
- for (i = 0; i < m->dmi_addrlen; i++)
- seq_printf(seq, "%02x", m->dmi_addr[i]);
-
- seq_putc(seq, '\n');
- }
- netif_addr_unlock_bh(dev);
- return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
- .start = dev_seq_start,
- .next = dev_seq_next,
- .stop = dev_seq_stop,
- .show = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open_net(inode, file, &dev_mc_seq_ops,
- sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
- .owner = THIS_MODULE,
- .open = dev_mc_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
- if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
- return -ENOMEM;
- return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
- proc_net_remove(net, "dev_mcast");
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
- .init = dev_mc_net_init,
- .exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
- register_pernet_subsys(&dev_mc_net_ops);
-}
-
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/core/dst.c b/net/core/dst.c
index f307bc1..b8c22f0 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -44,7 +44,7 @@ static atomic_t dst_total = ATOMIC_INIT(0);
*/
static struct {
spinlock_t lock;
- struct dst_entry *list;
+ struct dst_entry *list;
unsigned long timer_inc;
unsigned long timer_expires;
} dst_garbage = {
@@ -52,7 +52,7 @@ static struct {
.timer_inc = DST_GC_MAX,
};
static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry * dst);
+static void ___dst_free(struct dst_entry *dst);
static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
@@ -136,8 +136,8 @@ loop:
}
expires = dst_garbage.timer_expires;
/*
- * if the next desired timer is more than 4 seconds in the future
- * then round the timer to whole seconds
+ * if the next desired timer is more than 4 seconds in the
+ * future then round the timer to whole seconds
*/
if (expires > 4*HZ)
expires = round_jiffies_relative(expires);
@@ -152,7 +152,8 @@ loop:
" expires: %lu elapsed: %lu us\n",
atomic_read(&dst_total), delayed, work_performed,
expires,
- elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
+ elapsed.tv_sec * USEC_PER_SEC +
+ elapsed.tv_nsec / NSEC_PER_USEC);
#endif
}
@@ -163,9 +164,9 @@ int dst_discard(struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard);
-void * dst_alloc(struct dst_ops * ops)
+void *dst_alloc(struct dst_ops *ops)
{
- struct dst_entry * dst;
+ struct dst_entry *dst;
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
if (ops->gc(ops))
@@ -185,19 +186,20 @@ void * dst_alloc(struct dst_ops * ops)
atomic_inc(&ops->entries);
return dst;
}
+EXPORT_SYMBOL(dst_alloc);
-static void ___dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry *dst)
{
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
- if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
dst->input = dst->output = dst_discard;
- }
dst->obsolete = 2;
}
+EXPORT_SYMBOL(__dst_free);
-void __dst_free(struct dst_entry * dst)
+void __dst_free(struct dst_entry *dst)
{
spin_lock_bh(&dst_garbage.lock);
___dst_free(dst);
@@ -262,15 +264,16 @@ again:
}
return NULL;
}
+EXPORT_SYMBOL(dst_destroy);
void dst_release(struct dst_entry *dst)
{
if (dst) {
- int newrefcnt;
+ int newrefcnt;
smp_mb__before_atomic_dec();
- newrefcnt = atomic_dec_return(&dst->__refcnt);
- WARN_ON(newrefcnt < 0);
+ newrefcnt = atomic_dec_return(&dst->__refcnt);
+ WARN_ON(newrefcnt < 0);
}
}
EXPORT_SYMBOL(dst_release);
@@ -306,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
}
}
-static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
{
struct net_device *dev = ptr;
struct dst_entry *dst, *last = NULL;
@@ -329,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
last->next = dst;
else
dst_busy_list = dst;
- for (; dst; dst = dst->next) {
+ for (; dst; dst = dst->next)
dst_ifdown(dst, dev, event != NETDEV_DOWN);
- }
mutex_unlock(&dst_gc_mutex);
break;
}
@@ -346,7 +349,3 @@ void __init dst_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
-
-EXPORT_SYMBOL(__dst_free);
-EXPORT_SYMBOL(dst_alloc);
-EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9d55c57..1a7db92 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,8 +18,8 @@
#include <linux/ethtool.h>
#include <linux/netdevice.h>
#include <linux/bitops.h>
+#include <linux/uaccess.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
/*
* Some useful ethtool_ops methods that're device independent.
@@ -31,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
{
return netif_carrier_ok(dev) ? 1 : 0;
}
+EXPORT_SYMBOL(ethtool_op_get_link);
u32 ethtool_op_get_rx_csum(struct net_device *dev)
{
@@ -63,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
{
@@ -73,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
u32 ethtool_op_get_sg(struct net_device *dev)
{
return (dev->features & NETIF_F_SG) != 0;
}
+EXPORT_SYMBOL(ethtool_op_get_sg);
int ethtool_op_set_sg(struct net_device *dev, u32 data)
{
@@ -88,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_sg);
u32 ethtool_op_get_tso(struct net_device *dev)
{
return (dev->features & NETIF_F_TSO) != 0;
}
+EXPORT_SYMBOL(ethtool_op_get_tso);
int ethtool_op_set_tso(struct net_device *dev, u32 data)
{
@@ -103,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tso);
u32 ethtool_op_get_ufo(struct net_device *dev)
{
return (dev->features & NETIF_F_UFO) != 0;
}
+EXPORT_SYMBOL(ethtool_op_get_ufo);
int ethtool_op_set_ufo(struct net_device *dev, u32 data)
{
@@ -117,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
dev->features &= ~NETIF_F_UFO;
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_ufo);
/* the following list of flags are the same as their associated
* NETIF_F_xxx values in include/linux/netdevice.h
*/
static const u32 flags_dup_features =
- (ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+ (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
u32 ethtool_op_get_flags(struct net_device *dev)
{
@@ -133,6 +142,7 @@ u32 ethtool_op_get_flags(struct net_device *dev)
return dev->features & flags_dup_features;
}
+EXPORT_SYMBOL(ethtool_op_get_flags);
int ethtool_op_set_flags(struct net_device *dev, u32 data)
{
@@ -153,9 +163,15 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
features &= ~NETIF_F_NTUPLE;
}
+ if (data & ETH_FLAG_RXHASH)
+ features |= NETIF_F_RXHASH;
+ else
+ features &= ~NETIF_F_RXHASH;
+
dev->features = features;
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_flags);
void ethtool_ntuple_flush(struct net_device *dev)
{
@@ -201,7 +217,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_settings(dev, &cmd);
}
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -241,7 +258,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
}
static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
- void __user *useraddr)
+ void __user *useraddr)
{
struct ethtool_sset_info info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -300,7 +317,8 @@ out:
return ret;
}
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_rxnfc cmd;
@@ -313,7 +331,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u
return dev->ethtool_ops->set_rxnfc(dev, &cmd);
}
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_rxnfc info;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -358,8 +377,8 @@ err_out:
}
static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
- struct ethtool_rx_ntuple_flow_spec *spec,
- struct ethtool_rx_ntuple_flow_spec_container *fsc)
+ struct ethtool_rx_ntuple_flow_spec *spec,
+ struct ethtool_rx_ntuple_flow_spec_container *fsc)
{
/* don't add filters forever */
@@ -385,7 +404,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
list->count++;
}
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_rx_ntuple cmd;
const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -510,125 +530,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
case UDP_V4_FLOW:
case SCTP_V4_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4src);
+ fsc->fs.h_u.tcp_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4src);
+ fsc->fs.m_u.tcp_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+ fsc->fs.h_u.tcp_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+ fsc->fs.m_u.tcp_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.psrc,
- fsc->fs.m_u.tcp_ip4_spec.psrc);
+ fsc->fs.h_u.tcp_ip4_spec.psrc,
+ fsc->fs.m_u.tcp_ip4_spec.psrc);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.pdst,
- fsc->fs.m_u.tcp_ip4_spec.pdst);
+ fsc->fs.h_u.tcp_ip4_spec.pdst,
+ fsc->fs.m_u.tcp_ip4_spec.pdst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.tos,
- fsc->fs.m_u.tcp_ip4_spec.tos);
+ fsc->fs.h_u.tcp_ip4_spec.tos,
+ fsc->fs.m_u.tcp_ip4_spec.tos);
p += ETH_GSTRING_LEN;
num_strings++;
break;
case AH_ESP_V4_FLOW:
case ESP_V4_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4src);
+ fsc->fs.h_u.ah_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4src);
+ fsc->fs.m_u.ah_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4dst);
+ fsc->fs.h_u.ah_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4dst);
+ fsc->fs.m_u.ah_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSPI: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.spi,
- fsc->fs.m_u.ah_ip4_spec.spi);
+ fsc->fs.h_u.ah_ip4_spec.spi,
+ fsc->fs.m_u.ah_ip4_spec.spi);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.tos,
- fsc->fs.m_u.ah_ip4_spec.tos);
+ fsc->fs.h_u.ah_ip4_spec.tos,
+ fsc->fs.m_u.ah_ip4_spec.tos);
p += ETH_GSTRING_LEN;
num_strings++;
break;
case IP_USER_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4src);
+ fsc->fs.h_u.raw_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4src);
+ fsc->fs.m_u.raw_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4dst);
+ fsc->fs.h_u.raw_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4dst);
+ fsc->fs.m_u.raw_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
break;
case IPV4_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
+ fsc->fs.h_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
+ fsc->fs.m_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
+ fsc->fs.h_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
+ fsc->fs.m_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
- fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+ fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+ fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.tos,
- fsc->fs.m_u.usr_ip4_spec.tos);
+ fsc->fs.h_u.usr_ip4_spec.tos,
+ fsc->fs.m_u.usr_ip4_spec.tos);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip_ver,
- fsc->fs.m_u.usr_ip4_spec.ip_ver);
+ fsc->fs.h_u.usr_ip4_spec.ip_ver,
+ fsc->fs.m_u.usr_ip4_spec.ip_ver);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.proto,
- fsc->fs.m_u.usr_ip4_spec.proto);
+ fsc->fs.h_u.usr_ip4_spec.proto,
+ fsc->fs.m_u.usr_ip4_spec.proto);
p += ETH_GSTRING_LEN;
num_strings++;
break;
};
sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
- fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+ fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -641,7 +661,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
sprintf(p, "\tAction: Drop\n");
else
sprintf(p, "\tAction: Direct to queue %d\n",
- fsc->fs.action);
+ fsc->fs.action);
p += ETH_GSTRING_LEN;
num_strings++;
unknown_filter:
@@ -853,7 +873,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
return ret;
}
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
@@ -867,7 +888,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
return 0;
}
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+ void __user *useraddr)
{
struct ethtool_coalesce coalesce;
@@ -971,6 +993,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
{
@@ -1042,7 +1065,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
edata.data = dev->features & NETIF_F_GSO;
if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
+ return -EFAULT;
return 0;
}
@@ -1065,7 +1088,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
edata.data = dev->features & NETIF_F_GRO;
if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
+ return -EFAULT;
return 0;
}
@@ -1277,7 +1300,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
return actor(dev, edata.data);
}
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+ char __user *useraddr)
{
struct ethtool_flash efl;
@@ -1306,11 +1330,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (!dev->ethtool_ops)
return -EOPNOTSUPP;
- if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+ if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
/* Allow some commands to be done by anyone */
- switch(ethcmd) {
+ switch (ethcmd) {
case ETHTOOL_GDRVINFO:
case ETHTOOL_GMSGLVL:
case ETHTOOL_GCOALESCE:
@@ -1338,10 +1362,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return -EPERM;
}
- if (dev->ethtool_ops->begin)
- if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+ if (dev->ethtool_ops->begin) {
+ rc = dev->ethtool_ops->begin(dev);
+ if (rc < 0)
return rc;
-
+ }
old_features = dev->features;
switch (ethcmd) {
@@ -1531,16 +1556,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return rc;
}
-
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d2c3e7d..05cce4e 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -109,7 +109,7 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
struct fib_rules_ops *ops;
int err;
- ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
+ ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
if (ops == NULL)
return ERR_PTR(-ENOMEM);
@@ -124,7 +124,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
return ops;
}
-
EXPORT_SYMBOL_GPL(fib_rules_register);
void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -158,7 +157,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
call_rcu(&ops->rcu, fib_rules_put_rcu);
}
-
EXPORT_SYMBOL_GPL(fib_rules_unregister);
static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -221,7 +219,6 @@ out:
return err;
}
-
EXPORT_SYMBOL_GPL(fib_rules_lookup);
static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -614,7 +611,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
break;
cb->args[1] = 0;
- skip:
+skip:
idx++;
}
rcu_read_unlock();
@@ -686,7 +683,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
struct fib_rules_ops *ops;
ASSERT_RTNL();
- rcu_read_lock();
switch (event) {
case NETDEV_REGISTER:
@@ -700,8 +696,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
break;
}
- rcu_read_unlock();
-
return NOTIFY_DONE;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index 9601587..1619006 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,113 +26,158 @@
#include <linux/security.h>
struct flow_cache_entry {
- struct flow_cache_entry *next;
- u16 family;
- u8 dir;
- u32 genid;
- struct flowi key;
- void *object;
- atomic_t *object_ref;
+ union {
+ struct hlist_node hlist;
+ struct list_head gc_list;
+ } u;
+ u16 family;
+ u8 dir;
+ u32 genid;
+ struct flowi key;
+ struct flow_cache_object *object;
};
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-
-static u32 flow_hash_shift;
-#define flow_hash_size (1 << flow_hash_shift)
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
-
-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-
-static struct kmem_cache *flow_cachep __read_mostly;
+struct flow_cache_percpu {
+ struct hlist_head *hash_table;
+ int hash_count;
+ u32 hash_rnd;
+ int hash_rnd_recalc;
+ struct tasklet_struct flush_tasklet;
+};
-static int flow_lwm, flow_hwm;
+struct flow_flush_info {
+ struct flow_cache *cache;
+ atomic_t cpuleft;
+ struct completion completion;
+};
-struct flow_percpu_info {
- int hash_rnd_recalc;
- u32 hash_rnd;
- int count;
+struct flow_cache {
+ u32 hash_shift;
+ unsigned long order;
+ struct flow_cache_percpu *percpu;
+ struct notifier_block hotcpu_notifier;
+ int low_watermark;
+ int high_watermark;
+ struct timer_list rnd_timer;
};
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
-#define flow_hash_rnd_recalc(cpu) \
- (per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
-#define flow_hash_rnd(cpu) \
- (per_cpu(flow_hash_info, cpu).hash_rnd)
-#define flow_count(cpu) \
- (per_cpu(flow_hash_info, cpu).count)
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+static struct flow_cache flow_cache_global;
+static struct kmem_cache *flow_cachep;
-static struct timer_list flow_hash_rnd_timer;
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
-#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
-
-struct flow_flush_info {
- atomic_t cpuleft;
- struct completion completion;
-};
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
-
-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
{
+ struct flow_cache *fc = (void *) arg;
int i;
for_each_possible_cpu(i)
- flow_hash_rnd_recalc(i) = 1;
+ per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&flow_hash_rnd_timer);
+ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+ add_timer(&fc->rnd_timer);
+}
+
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+ if (atomic_read(&flow_cache_genid) != fle->genid)
+ return 0;
+ if (fle->object && !fle->object->ops->check(fle->object))
+ return 0;
+ return 1;
}
-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
{
if (fle->object)
- atomic_dec(fle->object_ref);
+ fle->object->ops->delete(fle->object);
kmem_cache_free(flow_cachep, fle);
- flow_count(cpu)--;
}
-static void __flow_cache_shrink(int cpu, int shrink_to)
+static void flow_cache_gc_task(struct work_struct *work)
{
- struct flow_cache_entry *fle, **flp;
- int i;
+ struct list_head gc_list;
+ struct flow_cache_entry *fce, *n;
- for (i = 0; i < flow_hash_size; i++) {
- int k = 0;
+ INIT_LIST_HEAD(&gc_list);
+ spin_lock_bh(&flow_cache_gc_lock);
+ list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+ spin_unlock_bh(&flow_cache_gc_lock);
- flp = &flow_table(cpu)[i];
- while ((fle = *flp) != NULL && k < shrink_to) {
- k++;
- flp = &fle->next;
- }
- while ((fle = *flp) != NULL) {
- *flp = fle->next;
- flow_entry_kill(cpu, fle);
- }
+ list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+ flow_entry_kill(fce);
+}
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
+
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
+ int deleted, struct list_head *gc_list)
+{
+ if (deleted) {
+ fcp->hash_count -= deleted;
+ spin_lock_bh(&flow_cache_gc_lock);
+ list_splice_tail(gc_list, &flow_cache_gc_list);
+ spin_unlock_bh(&flow_cache_gc_lock);
+ schedule_work(&flow_cache_gc_work);
}
}
-static void flow_cache_shrink(int cpu)
+static void __flow_cache_shrink(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp,
+ int shrink_to)
{
- int shrink_to = flow_lwm / flow_hash_size;
+ struct flow_cache_entry *fle;
+ struct hlist_node *entry, *tmp;
+ LIST_HEAD(gc_list);
+ int i, deleted = 0;
+
+ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ int saved = 0;
+
+ hlist_for_each_entry_safe(fle, entry, tmp,
+ &fcp->hash_table[i], u.hlist) {
+ if (saved < shrink_to &&
+ flow_entry_valid(fle)) {
+ saved++;
+ } else {
+ deleted++;
+ hlist_del(&fle->u.hlist);
+ list_add_tail(&fle->u.gc_list, &gc_list);
+ }
+ }
+ }
- __flow_cache_shrink(cpu, shrink_to);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list);
}
-static void flow_new_hash_rnd(int cpu)
+static void flow_cache_shrink(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp)
{
- get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
- flow_hash_rnd_recalc(cpu) = 0;
+ int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
- __flow_cache_shrink(cpu, 0);
+ __flow_cache_shrink(fc, fcp, shrink_to);
}
-static u32 flow_hash_code(struct flowi *key, int cpu)
+static void flow_new_hash_rnd(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp)
+{
+ get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+ fcp->hash_rnd_recalc = 0;
+ __flow_cache_shrink(fc, fcp, 0);
+}
+
+static u32 flow_hash_code(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp,
+ struct flowi *key)
{
u32 *k = (u32 *) key;
- return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
- (flow_hash_size - 1));
+ return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+ & (flow_cache_hash_size(fc) - 1));
}
#if (BITS_PER_LONG == 64)
@@ -165,114 +210,117 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
return 0;
}
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
- flow_resolve_t resolver)
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ flow_resolve_t resolver, void *ctx)
{
- struct flow_cache_entry *fle, **head;
+ struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache_percpu *fcp;
+ struct flow_cache_entry *fle, *tfle;
+ struct hlist_node *entry;
+ struct flow_cache_object *flo;
unsigned int hash;
- int cpu;
local_bh_disable();
- cpu = smp_processor_id();
+ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
fle = NULL;
+ flo = NULL;
/* Packet really early in init? Making flow_cache_init a
* pre-smp initcall would solve this. --RR */
- if (!flow_table(cpu))
+ if (!fcp->hash_table)
goto nocache;
- if (flow_hash_rnd_recalc(cpu))
- flow_new_hash_rnd(cpu);
- hash = flow_hash_code(key, cpu);
+ if (fcp->hash_rnd_recalc)
+ flow_new_hash_rnd(fc, fcp);
- head = &flow_table(cpu)[hash];
- for (fle = *head; fle; fle = fle->next) {
- if (fle->family == family &&
- fle->dir == dir &&
- flow_key_compare(key, &fle->key) == 0) {
- if (fle->genid == atomic_read(&flow_cache_genid)) {
- void *ret = fle->object;
-
- if (ret)
- atomic_inc(fle->object_ref);
- local_bh_enable();
-
- return ret;
- }
+ hash = flow_hash_code(fc, fcp, key);
+ hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+ if (tfle->family == family &&
+ tfle->dir == dir &&
+ flow_key_compare(key, &tfle->key) == 0) {
+ fle = tfle;
break;
}
}
- if (!fle) {
- if (flow_count(cpu) > flow_hwm)
- flow_cache_shrink(cpu);
+ if (unlikely(!fle)) {
+ if (fcp->hash_count > fc->high_watermark)
+ flow_cache_shrink(fc, fcp);
fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
if (fle) {
- fle->next = *head;
- *head = fle;
fle->family = family;
fle->dir = dir;
memcpy(&fle->key, key, sizeof(*key));
fle->object = NULL;
- flow_count(cpu)++;
+ hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
+ fcp->hash_count++;
}
+ } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+ flo = fle->object;
+ if (!flo)
+ goto ret_object;
+ flo = flo->ops->get(flo);
+ if (flo)
+ goto ret_object;
+ } else if (fle->object) {
+ flo = fle->object;
+ flo->ops->delete(flo);
+ fle->object = NULL;
}
nocache:
- {
- int err;
- void *obj;
- atomic_t *obj_ref;
-
- err = resolver(net, key, family, dir, &obj, &obj_ref);
-
- if (fle && !err) {
- fle->genid = atomic_read(&flow_cache_genid);
-
- if (fle->object)
- atomic_dec(fle->object_ref);
-
- fle->object = obj;
- fle->object_ref = obj_ref;
- if (obj)
- atomic_inc(fle->object_ref);
- }
- local_bh_enable();
-
- if (err)
- obj = ERR_PTR(err);
- return obj;
+ flo = NULL;
+ if (fle) {
+ flo = fle->object;
+ fle->object = NULL;
}
+ flo = resolver(net, key, family, dir, flo, ctx);
+ if (fle) {
+ fle->genid = atomic_read(&flow_cache_genid);
+ if (!IS_ERR(flo))
+ fle->object = flo;
+ else
+ fle->genid--;
+ } else {
+ if (flo && !IS_ERR(flo))
+ flo->ops->delete(flo);
+ }
+ret_object:
+ local_bh_enable();
+ return flo;
}
static void flow_cache_flush_tasklet(unsigned long data)
{
struct flow_flush_info *info = (void *)data;
- int i;
- int cpu;
-
- cpu = smp_processor_id();
- for (i = 0; i < flow_hash_size; i++) {
- struct flow_cache_entry *fle;
-
- fle = flow_table(cpu)[i];
- for (; fle; fle = fle->next) {
- unsigned genid = atomic_read(&flow_cache_genid);
-
- if (!fle->object || fle->genid == genid)
+ struct flow_cache *fc = info->cache;
+ struct flow_cache_percpu *fcp;
+ struct flow_cache_entry *fle;
+ struct hlist_node *entry, *tmp;
+ LIST_HEAD(gc_list);
+ int i, deleted = 0;
+
+ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ hlist_for_each_entry_safe(fle, entry, tmp,
+ &fcp->hash_table[i], u.hlist) {
+ if (flow_entry_valid(fle))
continue;
- fle->object = NULL;
- atomic_dec(fle->object_ref);
+ deleted++;
+ hlist_del(&fle->u.hlist);
+ list_add_tail(&fle->u.gc_list, &gc_list);
}
}
+ flow_cache_queue_garbage(fcp, deleted, &gc_list);
+
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
}
-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
static void flow_cache_flush_per_cpu(void *data)
{
struct flow_flush_info *info = data;
@@ -280,8 +328,7 @@ static void flow_cache_flush_per_cpu(void *data)
struct tasklet_struct *tasklet;
cpu = smp_processor_id();
-
- tasklet = flow_flush_tasklet(cpu);
+ tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
@@ -294,6 +341,7 @@ void flow_cache_flush(void)
/* Don't want cpus going down or up during this. */
get_online_cpus();
mutex_lock(&flow_flush_sem);
+ info.cache = &flow_cache_global;
atomic_set(&info.cpuleft, num_online_cpus());
init_completion(&info.completion);
@@ -307,62 +355,75 @@ void flow_cache_flush(void)
put_online_cpus();
}
-static void __init flow_cache_cpu_prepare(int cpu)
+static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp)
{
- struct tasklet_struct *tasklet;
- unsigned long order;
-
- for (order = 0;
- (PAGE_SIZE << order) <
- (sizeof(struct flow_cache_entry *)*flow_hash_size);
- order++)
- /* NOTHING */;
-
- flow_table(cpu) = (struct flow_cache_entry **)
- __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
- if (!flow_table(cpu))
- panic("NET: failed to allocate flow cache order %lu\n", order);
-
- flow_hash_rnd_recalc(cpu) = 1;
- flow_count(cpu) = 0;
-
- tasklet = flow_flush_tasklet(cpu);
- tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+ fcp->hash_table = (struct hlist_head *)
+ __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
+ if (!fcp->hash_table)
+ panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+
+ fcp->hash_rnd_recalc = 1;
+ fcp->hash_count = 0;
+ tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
}
static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
+ struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+ int cpu = (unsigned long) hcpu;
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
- __flow_cache_shrink((unsigned long)hcpu, 0);
+ __flow_cache_shrink(fc, fcp, 0);
return NOTIFY_OK;
}
-static int __init flow_cache_init(void)
+static int flow_cache_init(struct flow_cache *fc)
{
+ unsigned long order;
int i;
- flow_cachep = kmem_cache_create("flow_cache",
- sizeof(struct flow_cache_entry),
- 0, SLAB_PANIC,
- NULL);
- flow_hash_shift = 10;
- flow_lwm = 2 * flow_hash_size;
- flow_hwm = 4 * flow_hash_size;
+ fc->hash_shift = 10;
+ fc->low_watermark = 2 * flow_cache_hash_size(fc);
+ fc->high_watermark = 4 * flow_cache_hash_size(fc);
+
+ for (order = 0;
+ (PAGE_SIZE << order) <
+ (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
+ order++)
+ /* NOTHING */;
+ fc->order = order;
+ fc->percpu = alloc_percpu(struct flow_cache_percpu);
- setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&flow_hash_rnd_timer);
+ setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+ (unsigned long) fc);
+ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+ add_timer(&fc->rnd_timer);
for_each_possible_cpu(i)
- flow_cache_cpu_prepare(i);
+ flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
+
+ fc->hotcpu_notifier = (struct notifier_block){
+ .notifier_call = flow_cache_cpu,
+ };
+ register_hotcpu_notifier(&fc->hotcpu_notifier);
- hotcpu_notifier(flow_cache_cpu, 0);
return 0;
}
-module_init(flow_cache_init);
+static int __init flow_cache_init_global(void)
+{
+ flow_cachep = kmem_cache_create("flow_cache",
+ sizeof(struct flow_cache_entry),
+ 0, SLAB_PANIC, NULL);
+
+ return flow_cache_init(&flow_cache_global);
+}
+
+module_init(flow_cache_init_global);
EXPORT_SYMBOL(flow_cache_genid);
EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 59cfc7d..96ed690 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -467,6 +467,217 @@ static struct attribute_group wireless_group = {
};
#endif
+#ifdef CONFIG_RPS
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attr, char *buf);
+ ssize_t (*store)(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr, \
+ struct rx_queue_attribute, attr)
+
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+ struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+ struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(queue, attribute, buf, count);
+}
+
+static struct sysfs_ops rx_queue_sysfs_ops = {
+ .show = rx_queue_attr_show,
+ .store = rx_queue_attr_store,
+};
+
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attribute, char *buf)
+{
+ struct rps_map *map;
+ cpumask_var_t mask;
+ size_t len = 0;
+ int i;
+
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ rcu_read_lock();
+ map = rcu_dereference(queue->rps_map);
+ if (map)
+ for (i = 0; i < map->len; i++)
+ cpumask_set_cpu(map->cpus[i], mask);
+
+ len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+ if (PAGE_SIZE - len < 3) {
+ rcu_read_unlock();
+ free_cpumask_var(mask);
+ return -EINVAL;
+ }
+ rcu_read_unlock();
+
+ free_cpumask_var(mask);
+ len += sprintf(buf + len, "\n");
+ return len;
+}
+
+static void rps_map_release(struct rcu_head *rcu)
+{
+ struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+
+ kfree(map);
+}
+
+ssize_t store_rps_map(struct netdev_rx_queue *queue,
+ struct rx_queue_attribute *attribute,
+ const char *buf, size_t len)
+{
+ struct rps_map *old_map, *map;
+ cpumask_var_t mask;
+ int err, cpu, i;
+ static DEFINE_SPINLOCK(rps_map_lock);
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+ if (err) {
+ free_cpumask_var(mask);
+ return err;
+ }
+
+ map = kzalloc(max_t(unsigned,
+ RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+ GFP_KERNEL);
+ if (!map) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+
+ i = 0;
+ for_each_cpu_and(cpu, mask, cpu_online_mask)
+ map->cpus[i++] = cpu;
+
+ if (i)
+ map->len = i;
+ else {
+ kfree(map);
+ map = NULL;
+ }
+
+ spin_lock(&rps_map_lock);
+ old_map = queue->rps_map;
+ rcu_assign_pointer(queue->rps_map, map);
+ spin_unlock(&rps_map_lock);
+
+ if (old_map)
+ call_rcu(&old_map->rcu, rps_map_release);
+
+ free_cpumask_var(mask);
+ return len;
+}
+
+static struct rx_queue_attribute rps_cpus_attribute =
+ __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+static struct attribute *rx_queue_default_attrs[] = {
+ &rps_cpus_attribute.attr,
+ NULL
+};
+
+static void rx_queue_release(struct kobject *kobj)
+{
+ struct netdev_rx_queue *queue = to_rx_queue(kobj);
+ struct rps_map *map = queue->rps_map;
+ struct netdev_rx_queue *first = queue->first;
+
+ if (map)
+ call_rcu(&map->rcu, rps_map_release);
+
+ if (atomic_dec_and_test(&first->count))
+ kfree(first);
+}
+
+static struct kobj_type rx_queue_ktype = {
+ .sysfs_ops = &rx_queue_sysfs_ops,
+ .release = rx_queue_release,
+ .default_attrs = rx_queue_default_attrs,
+};
+
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+ struct netdev_rx_queue *queue = net->_rx + index;
+ struct kobject *kobj = &queue->kobj;
+ int error = 0;
+
+ kobj->kset = net->queues_kset;
+ error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+ "rx-%u", index);
+ if (error) {
+ kobject_put(kobj);
+ return error;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+
+ return error;
+}
+
+static int rx_queue_register_kobjects(struct net_device *net)
+{
+ int i;
+ int error = 0;
+
+ net->queues_kset = kset_create_and_add("queues",
+ NULL, &net->dev.kobj);
+ if (!net->queues_kset)
+ return -ENOMEM;
+ for (i = 0; i < net->num_rx_queues; i++) {
+ error = rx_queue_add_kobject(net, i);
+ if (error)
+ break;
+ }
+
+ if (error)
+ while (--i >= 0)
+ kobject_put(&net->_rx[i].kobj);
+
+ return error;
+}
+
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+ int i;
+
+ for (i = 0; i < net->num_rx_queues; i++)
+ kobject_put(&net->_rx[i].kobj);
+ kset_unregister(net->queues_kset);
+}
+#endif /* CONFIG_RPS */
#endif /* CONFIG_SYSFS */
#ifdef CONFIG_HOTPLUG
@@ -530,6 +741,10 @@ void netdev_unregister_kobject(struct net_device * net)
if (!net_eq(dev_net(net), &init_net))
return;
+#ifdef CONFIG_RPS
+ rx_queue_remove_kobjects(net);
+#endif
+
device_del(dev);
}
@@ -538,6 +753,7 @@ int netdev_register_kobject(struct net_device *net)
{
struct device *dev = &(net->dev);
const struct attribute_group **groups = net->sysfs_groups;
+ int error = 0;
dev->class = &net_class;
dev->platform_data = net;
@@ -564,7 +780,19 @@ int netdev_register_kobject(struct net_device *net)
if (!net_eq(dev_net(net), &init_net))
return 0;
- return device_add(dev);
+ error = device_add(dev);
+ if (error)
+ return error;
+
+#ifdef CONFIG_RPS
+ error = rx_queue_register_kobjects(net);
+ if (error) {
+ device_del(dev);
+ return error;
+ }
+#endif
+
+ return error;
}
int netdev_class_create_file(struct class_attribute *class_attr)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4392381..2ad68da 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
#include <asm/dma.h>
#include <asm/div64.h> /* do_div */
-#define VERSION "2.72"
+#define VERSION "2.73"
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -190,6 +190,7 @@
#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
+#define F_NODE (1<<15) /* Node memory alloc*/
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -372,6 +373,7 @@ struct pktgen_dev {
u16 queue_map_min;
u16 queue_map_max;
+ int node; /* Memory node */
#ifdef CONFIG_XFRM
__u8 ipsmode; /* IPSEC mode (config) */
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->traffic_class)
seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
+ if (pkt_dev->node >= 0)
+ seq_printf(seq, " node: %d\n", pkt_dev->node);
+
seq_printf(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_SVID_RND)
seq_printf(seq, "SVID_RND ");
+ if (pkt_dev->flags & F_NODE)
+ seq_printf(seq, "NODE_ALLOC ");
+
seq_puts(seq, "\n");
/* not really stopped, more like last-running-at */
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->dst_mac_count);
return count;
}
+ if (!strcmp(name, "node")) {
+ len = num_arg(&user_buffer[i], 10, &value);
+ if (len < 0)
+ return len;
+
+ i += len;
+
+ if (node_possible(value)) {
+ pkt_dev->node = value;
+ sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+ }
+ else
+ sprintf(pg_result, "ERROR: node not possible");
+ return count;
+ }
if (!strcmp(name, "flag")) {
char f[32];
memset(f, 0, 32);
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!IPV6") == 0)
pkt_dev->flags &= ~F_IPV6;
+ else if (strcmp(f, "NODE_ALLOC") == 0)
+ pkt_dev->flags |= F_NODE;
+
+ else if (strcmp(f, "!NODE_ALLOC") == 0)
+ pkt_dev->flags &= ~F_NODE;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
- "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+ "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
return count;
}
sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mod_cur_headers(pkt_dev);
datalen = (odev->hard_header_len + 16) & ~0xf;
- skb = __netdev_alloc_skb(odev,
- pkt_dev->cur_pkt_size + 64
- + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
+ if (pkt_dev->flags & F_NODE) {
+ int node;
+
+ if (pkt_dev->node >= 0)
+ node = pkt_dev->node;
+ else
+ node = numa_node_id();
+
+ skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+ + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+ if (likely(skb)) {
+ skb_reserve(skb, NET_SKB_PAD);
+ skb->dev = odev;
+ }
+ }
+ else
+ skb = __netdev_alloc_skb(odev,
+ pkt_dev->cur_pkt_size + 64
+ + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->svlan_p = 0;
pkt_dev->svlan_cfi = 0;
pkt_dev->svlan_id = 0xffff;
+ pkt_dev->node = -1;
err = pktgen_setup_dev(pkt_dev, ifname);
if (err)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4568120..bf919b6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -600,7 +600,41 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->rx_compressed = b->rx_compressed;
a->tx_compressed = b->tx_compressed;
-};
+}
+
+static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
+{
+ struct rtnl_link_stats64 a;
+
+ a.rx_packets = b->rx_packets;
+ a.tx_packets = b->tx_packets;
+ a.rx_bytes = b->rx_bytes;
+ a.tx_bytes = b->tx_bytes;
+ a.rx_errors = b->rx_errors;
+ a.tx_errors = b->tx_errors;
+ a.rx_dropped = b->rx_dropped;
+ a.tx_dropped = b->tx_dropped;
+
+ a.multicast = b->multicast;
+ a.collisions = b->collisions;
+
+ a.rx_length_errors = b->rx_length_errors;
+ a.rx_over_errors = b->rx_over_errors;
+ a.rx_crc_errors = b->rx_crc_errors;
+ a.rx_frame_errors = b->rx_frame_errors;
+ a.rx_fifo_errors = b->rx_fifo_errors;
+ a.rx_missed_errors = b->rx_missed_errors;
+
+ a.tx_aborted_errors = b->tx_aborted_errors;
+ a.tx_carrier_errors = b->tx_carrier_errors;
+ a.tx_fifo_errors = b->tx_fifo_errors;
+ a.tx_heartbeat_errors = b->tx_heartbeat_errors;
+ a.tx_window_errors = b->tx_window_errors;
+
+ a.rx_compressed = b->rx_compressed;
+ a.tx_compressed = b->tx_compressed;
+ memcpy(v, &a, sizeof(a));
+}
static inline int rtnl_vfinfo_size(const struct net_device *dev)
{
@@ -619,6 +653,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
+ nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
+ nla_total_size(sizeof(struct rtnl_link_ifmap))
+ nla_total_size(sizeof(struct rtnl_link_stats))
+ + nla_total_size(sizeof(struct rtnl_link_stats64))
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
+ nla_total_size(4) /* IFLA_TXQLEN */
@@ -698,6 +733,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
stats = dev_get_stats(dev);
copy_rtnl_link_stats(nla_data(attr), stats);
+ attr = nla_reserve(skb, IFLA_STATS64,
+ sizeof(struct rtnl_link_stats64));
+ if (attr == NULL)
+ goto nla_put_failure;
+ copy_rtnl_link_stats64(nla_data(attr), stats);
+
if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
int i;
struct ifla_vf_info ivi;
@@ -1473,6 +1514,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_POST_INIT:
case NETDEV_REGISTER:
case NETDEV_CHANGE:
+ case NETDEV_PRE_TYPE_CHANGE:
case NETDEV_GOING_DOWN:
case NETDEV_UNREGISTER:
case NETDEV_UNREGISTER_BATCH:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e06..bdea0ef 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->network_header = old->network_header;
new->mac_header = old->mac_header;
skb_dst_set(new, dst_clone(skb_dst(old)));
+ new->rxhash = old->rxhash;
#ifdef CONFIG_XFRM
new->sp = secpath_get(old->sp);
#endif
@@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
C(len);
C(data_len);
C(mac_len);
+ C(rxhash);
n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
n->cloned = 1;
n->nohdr = 0;
OpenPOWER on IntegriCloud