summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c294
1 files changed, 226 insertions, 68 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fba549..ea24693 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -115,6 +115,8 @@
#include <net/iw_handler.h>
#include <asm/current.h>
#include <linux/audit.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
/*
* The list of packet types we will receive (as opposed to discard)
@@ -148,6 +150,12 @@ static DEFINE_SPINLOCK(ptype_lock);
static struct list_head ptype_base[16]; /* 16 way hashed list */
static struct list_head ptype_all; /* Taps */
+#ifdef CONFIG_NET_DMA
+static struct dma_client *net_dma_client;
+static unsigned int net_dma_count;
+static spinlock_t net_dma_event_lock;
+#endif
+
/*
* The @dev_base list is protected by @dev_base_lock and the rtnl
* semaphore.
@@ -1041,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb)
* taps currently in use.
*/
-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
@@ -1179,6 +1187,40 @@ out:
return ret;
}
+/**
+ * skb_gso_segment - Perform segmentation on skb.
+ * @skb: buffer to segment
+ * @sg: whether scatter-gather is supported on the target.
+ *
+ * This function segments the given skb and returns a list of segments.
+ */
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_type *ptype;
+ int type = skb->protocol;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+ BUG_ON(skb->ip_summed != CHECKSUM_HW);
+
+ skb->mac.raw = skb->data;
+ skb->mac_len = skb->nh.raw - skb->data;
+ __skb_pull(skb, skb->mac_len);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+ if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+ segs = ptype->gso_segment(skb, sg);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+
+EXPORT_SYMBOL(skb_gso_segment);
+
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
void netdev_rx_csum_fault(struct net_device *dev)
@@ -1215,75 +1257,95 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
#define illegal_highdma(dev, skb) (0)
#endif
-/* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
+struct dev_gso_cb {
+ void (*destructor)(struct sk_buff *skb);
+};
+
+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
+
+static void dev_gso_skb_destructor(struct sk_buff *skb)
{
- unsigned int size;
- u8 *data;
- long offset;
- struct skb_shared_info *ninfo;
- int headerlen = skb->data - skb->head;
- int expand = (skb->tail + skb->data_len) - skb->end;
+ struct dev_gso_cb *cb;
- if (skb_shared(skb))
- BUG();
+ do {
+ struct sk_buff *nskb = skb->next;
- if (expand <= 0)
- expand = 0;
+ skb->next = nskb->next;
+ nskb->next = NULL;
+ kfree_skb(nskb);
+ } while (skb->next);
- size = skb->end - skb->head + expand;
- size = SKB_DATA_ALIGN(size);
- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
- if (!data)
- return -ENOMEM;
+ cb = DEV_GSO_CB(skb);
+ if (cb->destructor)
+ cb->destructor(skb);
+}
+
+/**
+ * dev_gso_segment - Perform emulated hardware segmentation on skb.
+ * @skb: buffer to segment
+ *
+ * This function segments the given skb and stores the list of segments
+ * in skb->next.
+ */
+static int dev_gso_segment(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct sk_buff *segs;
- /* Copy entire thing */
- if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
- BUG();
+ segs = skb_gso_segment(skb, dev->features & NETIF_F_SG &&
+ !illegal_highdma(dev, skb));
+ if (unlikely(IS_ERR(segs)))
+ return PTR_ERR(segs);
- /* Set up shinfo */
- ninfo = (struct skb_shared_info*)(data + size);
- atomic_set(&ninfo->dataref, 1);
- ninfo->tso_size = skb_shinfo(skb)->tso_size;
- ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
- ninfo->nr_frags = 0;
- ninfo->frag_list = NULL;
+ skb->next = segs;
+ DEV_GSO_CB(skb)->destructor = skb->destructor;
+ skb->destructor = dev_gso_skb_destructor;
- /* Offset between the two in bytes */
- offset = data - skb->head;
+ return 0;
+}
- /* Free old data. */
- skb_release_data(skb);
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ if (likely(!skb->next)) {
+ if (netdev_nit)
+ dev_queue_xmit_nit(skb, dev);
- skb->head = data;
- skb->end = data + size;
+ if (!netif_needs_gso(dev, skb))
+ return dev->hard_start_xmit(skb, dev);
+
+ if (unlikely(dev_gso_segment(skb)))
+ goto out_kfree_skb;
+ }
- /* Set up new pointers */
- skb->h.raw += offset;
- skb->nh.raw += offset;
- skb->mac.raw += offset;
- skb->tail += offset;
- skb->data += offset;
+ do {
+ struct sk_buff *nskb = skb->next;
+ int rc;
- /* We are no longer a clone, even if we were. */
- skb->cloned = 0;
+ skb->next = nskb->next;
+ nskb->next = NULL;
+ rc = dev->hard_start_xmit(nskb, dev);
+ if (unlikely(rc)) {
+ skb->next = nskb;
+ return rc;
+ }
+ } while (skb->next);
+
+ skb->destructor = DEV_GSO_CB(skb)->destructor;
- skb->tail += skb->data_len;
- skb->data_len = 0;
+out_kfree_skb:
+ kfree_skb(skb);
return 0;
}
#define HARD_TX_LOCK(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
- spin_lock(&dev->xmit_lock); \
- dev->xmit_lock_owner = cpu; \
+ netif_tx_lock(dev); \
} \
}
#define HARD_TX_UNLOCK(dev) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
- dev->xmit_lock_owner = -1; \
- spin_unlock(&dev->xmit_lock); \
+ netif_tx_unlock(dev); \
} \
}
@@ -1319,9 +1381,13 @@ int dev_queue_xmit(struct sk_buff *skb)
struct Qdisc *q;
int rc = -ENOMEM;
+ /* GSO will handle the following emulations directly. */
+ if (netif_needs_gso(dev, skb))
+ goto gso;
+
if (skb_shinfo(skb)->frag_list &&
!(dev->features & NETIF_F_FRAGLIST) &&
- __skb_linearize(skb, GFP_ATOMIC))
+ __skb_linearize(skb))
goto out_kfree_skb;
/* Fragmented skb is linearized if device does not support SG,
@@ -1330,25 +1396,26 @@ int dev_queue_xmit(struct sk_buff *skb)
*/
if (skb_shinfo(skb)->nr_frags &&
(!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
- __skb_linearize(skb, GFP_ATOMIC))
+ __skb_linearize(skb))
goto out_kfree_skb;
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
if (skb->ip_summed == CHECKSUM_HW &&
- (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
+ (!(dev->features & NETIF_F_GEN_CSUM) &&
(!(dev->features & NETIF_F_IP_CSUM) ||
skb->protocol != htons(ETH_P_IP))))
if (skb_checksum_help(skb, 0))
goto out_kfree_skb;
+gso:
spin_lock_prefetch(&dev->queue_lock);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
- local_bh_disable();
+ rcu_read_lock_bh();
/* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
@@ -1382,8 +1449,8 @@ int dev_queue_xmit(struct sk_buff *skb)
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
- Really, it is unlikely that xmit_lock protection is necessary here.
- (f.e. loopback and IP tunnels are clean ignoring statistics
+ Really, it is unlikely that netif_tx_lock protection is necessary
+ here. (f.e. loopback and IP tunnels are clean ignoring statistics
counters.)
However, it is possible, that they rely on protection
made by us here.
@@ -1399,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb)
HARD_TX_LOCK(dev, cpu);
if (!netif_queue_stopped(dev)) {
- if (netdev_nit)
- dev_queue_xmit_nit(skb, dev);
-
rc = 0;
- if (!dev->hard_start_xmit(skb, dev)) {
+ if (!dev_hard_start_xmit(skb, dev)) {
HARD_TX_UNLOCK(dev);
goto out;
}
@@ -1422,13 +1486,13 @@ int dev_queue_xmit(struct sk_buff *skb)
}
rc = -ENETDOWN;
- local_bh_enable();
+ rcu_read_unlock_bh();
out_kfree_skb:
kfree_skb(skb);
return rc;
out:
- local_bh_enable();
+ rcu_read_unlock_bh();
return rc;
}
@@ -1846,6 +1910,19 @@ static void net_rx_action(struct softirq_action *h)
}
}
out:
+#ifdef CONFIG_NET_DMA
+ /*
+ * There may not be any more sk_buffs coming right now, so push
+ * any pending DMA copies to hardware
+ */
+ if (net_dma_client) {
+ struct dma_chan *chan;
+ rcu_read_lock();
+ list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
+ dma_async_memcpy_issue_pending(chan);
+ rcu_read_unlock();
+ }
+#endif
local_irq_enable();
return;
@@ -2785,7 +2862,7 @@ int register_netdevice(struct net_device *dev)
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
spin_lock_init(&dev->queue_lock);
- spin_lock_init(&dev->xmit_lock);
+ spin_lock_init(&dev->_xmit_lock);
dev->xmit_lock_owner = -1;
#ifdef CONFIG_NET_CLS_ACT
spin_lock_init(&dev->ingress_lock);
@@ -2829,9 +2906,7 @@ int register_netdevice(struct net_device *dev)
/* Fix illegal SG+CSUM combinations. */
if ((dev->features & NETIF_F_SG) &&
- !(dev->features & (NETIF_F_IP_CSUM |
- NETIF_F_NO_CSUM |
- NETIF_F_HW_CSUM))) {
+ !(dev->features & NETIF_F_ALL_CSUM)) {
printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
dev->name);
dev->features &= ~NETIF_F_SG;
@@ -3022,7 +3097,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
static DEFINE_MUTEX(net_todo_run_mutex);
void netdev_run_todo(void)
{
- struct list_head list = LIST_HEAD_INIT(list);
+ struct list_head list;
/* Need to guard against multiple cpu's getting out of order. */
mutex_lock(&net_todo_run_mutex);
@@ -3037,9 +3112,9 @@ void netdev_run_todo(void)
/* Snapshot list, allow later requests */
spin_lock(&net_todo_list_lock);
- list_splice_init(&net_todo_list, &list);
+ list_replace_init(&net_todo_list, &list);
spin_unlock(&net_todo_list_lock);
-
+
while (!list_empty(&list)) {
struct net_device *dev
= list_entry(list.next, struct net_device, todo_list);
@@ -3300,6 +3375,88 @@ static int dev_cpu_callback(struct notifier_block *nfb,
}
#endif /* CONFIG_HOTPLUG_CPU */
+#ifdef CONFIG_NET_DMA
+/**
+ * net_dma_rebalance -
+ * This is called when the number of channels allocated to the net_dma_client
+ * changes. The net_dma_client tries to have one DMA channel per CPU.
+ */
+static void net_dma_rebalance(void)
+{
+ unsigned int cpu, i, n;
+ struct dma_chan *chan;
+
+ lock_cpu_hotplug();
+
+ if (net_dma_count == 0) {
+ for_each_online_cpu(cpu)
+ rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL);
+ unlock_cpu_hotplug();
+ return;
+ }
+
+ i = 0;
+ cpu = first_cpu(cpu_online_map);
+
+ rcu_read_lock();
+ list_for_each_entry(chan, &net_dma_client->channels, client_node) {
+ n = ((num_online_cpus() / net_dma_count)
+ + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
+
+ while(n) {
+ per_cpu(softnet_data.net_dma, cpu) = chan;
+ cpu = next_cpu(cpu, cpu_online_map);
+ n--;
+ }
+ i++;
+ }
+ rcu_read_unlock();
+
+ unlock_cpu_hotplug();
+}
+
+/**
+ * netdev_dma_event - event callback for the net_dma_client
+ * @client: should always be net_dma_client
+ * @chan: DMA channel for the event
+ * @event: event type
+ */
+static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
+ enum dma_event event)
+{
+ spin_lock(&net_dma_event_lock);
+ switch (event) {
+ case DMA_RESOURCE_ADDED:
+ net_dma_count++;
+ net_dma_rebalance();
+ break;
+ case DMA_RESOURCE_REMOVED:
+ net_dma_count--;
+ net_dma_rebalance();
+ break;
+ default:
+ break;
+ }
+ spin_unlock(&net_dma_event_lock);
+}
+
+/**
+ * netdev_dma_regiser - register the networking subsystem as a DMA client
+ */
+static int __init netdev_dma_register(void)
+{
+ spin_lock_init(&net_dma_event_lock);
+ net_dma_client = dma_async_client_register(netdev_dma_event);
+ if (net_dma_client == NULL)
+ return -ENOMEM;
+
+ dma_async_client_chan_request(net_dma_client, num_online_cpus());
+ return 0;
+}
+
+#else
+static int __init netdev_dma_register(void) { return -ENODEV; }
+#endif /* CONFIG_NET_DMA */
/*
* Initialize the DEV module. At boot time this walks the device list and
@@ -3353,6 +3510,8 @@ static int __init net_dev_init(void)
atomic_set(&queue->backlog_dev.refcnt, 1);
}
+ netdev_dma_register();
+
dev_boot_phase = 0;
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
@@ -3371,7 +3530,6 @@ subsys_initcall(net_dev_init);
EXPORT_SYMBOL(__dev_get_by_index);
EXPORT_SYMBOL(__dev_get_by_name);
EXPORT_SYMBOL(__dev_remove_pack);
-EXPORT_SYMBOL(__skb_linearize);
EXPORT_SYMBOL(dev_valid_name);
EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_alloc_name);
OpenPOWER on IntegriCloud