From 8728c544a9cbdcb0034aa5c45706c5f953f030ee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Apr 2010 21:18:17 +0000 Subject: net: dev_pick_tx() fix When dev_pick_tx() caches tx queue_index on a socket, we must check socket dst_entry matches skb one, or risk a crash later, as reported by Denys Fedorysychenko, if old packets are in flight during a route change, involving devices with different number of queues. Bug introduced by commit a4ee3ce3 (net: Use sk_tx_queue_mapping for connected sockets) Reported-by: Denys Fedorysychenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 1c8a0ce..92584bf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1989,8 +1989,12 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); - if (sk && sk->sk_dst_cache) - sk_tx_queue_set(sk, queue_index); + if (sk) { + struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache); + + if (dst && skb_dst(skb) == dst) + sk_tx_queue_set(sk, queue_index); + } } } -- cgit v1.1 From 05d17608a69b3ae653ea5c9857283bef3439c733 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 20 Apr 2010 00:25:58 +0000 Subject: net: Fix an RCU warning in dev_pick_tx() Fix the following RCU warning in dev_pick_tx(): =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- net/core/dev.c:1993 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by swapper/0: #0: (&idev->mc_ifc_timer){+.-...}, at: [] run_timer_softirq+0x17b/0x278 #1: (rcu_read_lock_bh){.+....}, at: [] dev_queue_xmit+0x14e/0x4dc stack backtrace: Pid: 0, comm: swapper Not tainted 2.6.34-rc5-cachefs #4 Call Trace: [] lockdep_rcu_dereference+0xaa/0xb2 [] dev_queue_xmit+0x259/0x4dc [] ? dev_queue_xmit+0x14e/0x4dc [] ? trace_hardirqs_on+0xd/0xf [] ? local_bh_enable_ip+0xbc/0xc1 [] neigh_resolve_output+0x24b/0x27c [] ip6_output_finish+0x7c/0xb4 [] ip6_output2+0x256/0x261 [] ? trace_hardirqs_on+0xd/0xf [] ip6_output+0xbbc/0xbcb [] ? fib6_force_start_gc+0x2b/0x2d [] mld_sendpack+0x273/0x39d [] ? mld_sendpack+0x0/0x39d [] ? mark_held_locks+0x52/0x70 [] mld_ifc_timer_expire+0x24f/0x288 [] run_timer_softirq+0x1ec/0x278 [] ? run_timer_softirq+0x17b/0x278 [] ? mld_ifc_timer_expire+0x0/0x288 [] ? __do_softirq+0x69/0x140 [] __do_softirq+0xa2/0x140 [] call_softirq+0x1c/0x28 [] do_softirq+0x38/0x80 [] irq_exit+0x45/0x47 [] smp_apic_timer_interrupt+0x88/0x96 [] apic_timer_interrupt+0x13/0x20 [] ? __atomic_notifier_call_chain+0x0/0x86 [] ? mwait_idle+0x6e/0x78 [] ? mwait_idle+0x65/0x78 [] cpu_idle+0x4d/0x83 [] rest_init+0xb9/0xc0 [] ? rest_init+0x0/0xc0 [] start_kernel+0x392/0x39d [] x86_64_start_reservations+0xb3/0xb7 [] x86_64_start_kernel+0xe4/0xeb An rcu_dereference() should be an rcu_dereference_bh(). Signed-off-by: David Howells Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 92584bf..f769098 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1990,7 +1990,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, queue_index = skb_tx_hash(dev, skb); if (sk) { - struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache); + struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); if (dst && skb_dst(skb) == dst) sk_tx_queue_set(sk, queue_index); -- cgit v1.1 From 9a20e3197e7f6097897c6d1f18335a326ee06299 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 20:08:36 +0000 Subject: net: Introduce skb_orphan_try() At this point, skb->destructor is not the original one (stored in DEV_GSO_CB(skb)->destructor) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index e904c47..9bf1ccc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1937,7 +1937,6 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); - skb_orphan_try(nskb); rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) -- cgit v1.1 From e326bed2f47d0365da5a8faaf8ee93ed2d86325b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Apr 2010 00:22:45 -0700 Subject: rps: immediate send IPI in process_backlog() If some skb are queued to our backlog, we are delaying IPI sending at the end of net_rx_action(), increasing latencies. This defeats the queueing, since we want to quickly dispatch packets to the pool of worker cpus, then eventually deeply process our packets. It's better to send IPI before processing our packets in upper layers, from process_backlog(). Change the _and_disable_irq suffix to _and_enable_irq(), since we enable local irq in net_rps_action(), sorry for the confusion. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 76 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 42 insertions(+), 34 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 9bf1ccc..3ba774b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3242,11 +3242,48 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_frags); +/* + * net_rps_action sends any pending IPI's for rps. + * Note: called with local irq disabled, but exits with local irq enabled. + */ +static void net_rps_action_and_irq_enable(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + struct softnet_data *remsd = sd->rps_ipi_list; + + if (remsd) { + sd->rps_ipi_list = NULL; + + local_irq_enable(); + + /* Send pending IPI's to kick RPS processing on remote cpus. */ + while (remsd) { + struct softnet_data *next = remsd->rps_ipi_next; + + if (cpu_online(remsd->cpu)) + __smp_call_function_single(remsd->cpu, + &remsd->csd, 0); + remsd = next; + } + } else +#endif + local_irq_enable(); +} + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; struct softnet_data *sd = &__get_cpu_var(softnet_data); +#ifdef CONFIG_RPS + /* Check if we have pending ipi, its better to send them now, + * not waiting net_rx_action() end. + */ + if (sd->rps_ipi_list) { + local_irq_disable(); + net_rps_action_and_irq_enable(sd); + } +#endif napi->weight = weight_p; do { struct sk_buff *skb; @@ -3353,45 +3390,16 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); -/* - * net_rps_action sends any pending IPI's for rps. - * Note: called with local irq disabled, but exits with local irq enabled. - */ -static void net_rps_action_and_irq_disable(void) -{ -#ifdef CONFIG_RPS - struct softnet_data *sd = &__get_cpu_var(softnet_data); - struct softnet_data *remsd = sd->rps_ipi_list; - - if (remsd) { - sd->rps_ipi_list = NULL; - - local_irq_enable(); - - /* Send pending IPI's to kick RPS processing on remote cpus. */ - while (remsd) { - struct softnet_data *next = remsd->rps_ipi_next; - - if (cpu_online(remsd->cpu)) - __smp_call_function_single(remsd->cpu, - &remsd->csd, 0); - remsd = next; - } - } else -#endif - local_irq_enable(); -} - static void net_rx_action(struct softirq_action *h) { - struct list_head *list = &__get_cpu_var(softnet_data).poll_list; + struct softnet_data *sd = &__get_cpu_var(softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; local_irq_disable(); - while (!list_empty(list)) { + while (!list_empty(&sd->poll_list)) { struct napi_struct *n; int work, weight; @@ -3409,7 +3417,7 @@ static void net_rx_action(struct softirq_action *h) * entries to the tail of this list, and only ->poll() * calls can remove this head entry from the list. */ - n = list_first_entry(list, struct napi_struct, poll_list); + n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); have = netpoll_poll_lock(n); @@ -3444,13 +3452,13 @@ static void net_rx_action(struct softirq_action *h) napi_complete(n); local_irq_disable(); } else - list_move_tail(&n->poll_list, list); + list_move_tail(&n->poll_list, &sd->poll_list); } netpoll_poll_unlock(have); } out: - net_rps_action_and_irq_disable(); + net_rps_action_and_irq_enable(sd); #ifdef CONFIG_NET_DMA /* -- cgit v1.1 From 9ccb8975940c4ee51161152e37058e3d9e06c62f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 22 Apr 2010 01:02:07 -0700 Subject: net: Orphan and de-dst skbs earlier in xmit path. This way GSO packets don't get handled differently. With help from Eric Dumazet. Signed-off-by: David S. Miller Signed-off-by: Eric Dumazet --- net/core/dev.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3ba774b..a4a7c36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1902,13 +1902,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); - if (netif_needs_gso(dev, skb)) { - if (unlikely(dev_gso_segment(skb))) - goto out_kfree_skb; - if (skb->next) - goto gso; - } - /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -1917,6 +1910,14 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); skb_orphan_try(skb); + + if (netif_needs_gso(dev, skb)) { + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + if (skb->next) + goto gso; + } + rc = ops->ndo_start_xmit(skb, dev); if (rc == NETDEV_TX_OK) txq_trans_update(txq); -- cgit v1.1 From 8c52d509e84bbf26cffb8b6e75b399689af67885 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 24 Apr 2010 22:50:10 -0700 Subject: rps: optimize rps_get_cpu() optimize rps_get_cpu(). don't initialize ports when we can get the ports. one memory access for ports than two. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/dev.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a4a7c36..4d43f1a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2229,7 +2229,11 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, int cpu = -1; u8 ip_proto; u16 tcpu; - u32 addr1, addr2, ports, ihl; + u32 addr1, addr2, ihl; + union { + u32 v32; + u16 v16[2]; + } ports; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); @@ -2275,7 +2279,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, default: goto done; } - ports = 0; switch (ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: @@ -2285,25 +2288,20 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, case IPPROTO_SCTP: case IPPROTO_UDPLITE: if (pskb_may_pull(skb, (ihl * 4) + 4)) { - __be16 *hports = (__be16 *) (skb->data + (ihl * 4)); - u32 sport, dport; - - sport = (__force u16) hports[0]; - dport = (__force u16) hports[1]; - if (dport < sport) - swap(sport, dport); - ports = (sport << 16) + dport; + ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); + if (ports.v16[1] < ports.v16[0]) + swap(ports.v16[0], ports.v16[1]); + break; } - break; - default: + ports.v32 = 0; break; } /* get a consistent hash (same value on both flow directions) */ if (addr2 < addr1) swap(addr1, addr2); - skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd); + skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); if (!skb->rxhash) skb->rxhash = 1; -- cgit v1.1 From a9cbd588fdb71ea415754c885e2f9f03e6bf1ba0 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 26 Apr 2010 23:06:24 +0000 Subject: net: reimplement softnet_data.output_queue as a FIFO queue reimplement softnet_data.output_queue as a FIFO queue to keep the fairness among the qdiscs rescheduled. Signed-off-by: Changli Gao Acked-by: Eric Dumazet ---- include/linux/netdevice.h | 1 + net/core/dev.c | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4d43f1a..3d31491 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1557,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q) local_irq_save(flags); sd = &__get_cpu_var(softnet_data); - q->next_sched = sd->output_queue; - sd->output_queue = q; + q->next_sched = NULL; + *sd->output_queue_tailp = q; + sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } @@ -2529,6 +2530,7 @@ static void net_tx_action(struct softirq_action *h) local_irq_disable(); head = sd->output_queue; sd->output_queue = NULL; + sd->output_queue_tailp = &sd->output_queue; local_irq_enable(); while (head) { @@ -5594,7 +5596,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, void *ocpu) { struct sk_buff **list_skb; - struct Qdisc **list_net; struct sk_buff *skb; unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; @@ -5615,13 +5616,13 @@ static int dev_cpu_callback(struct notifier_block *nfb, *list_skb = oldsd->completion_queue; oldsd->completion_queue = NULL; - /* Find end of our output_queue. */ - list_net = &sd->output_queue; - while (*list_net) - list_net = &(*list_net)->next_sched; /* Append output queue from offline CPU. */ - *list_net = oldsd->output_queue; - oldsd->output_queue = NULL; + if (oldsd->output_queue) { + *sd->output_queue_tailp = oldsd->output_queue; + sd->output_queue_tailp = oldsd->output_queue_tailp; + oldsd->output_queue = NULL; + oldsd->output_queue_tailp = &oldsd->output_queue; + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); @@ -5851,7 +5852,8 @@ static int __init net_dev_init(void) skb_queue_head_init(&sd->input_pkt_queue); sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); - + sd->output_queue = NULL; + sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; -- cgit v1.1 From 6e7676c1a76aed6e957611d8d7a9e5592e23aeba Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 27 Apr 2010 15:07:33 -0700 Subject: net: batch skb dequeueing from softnet input_pkt_queue batch skb dequeueing from softnet input_pkt_queue to reduce potential lock contention when RPS is enabled. Note: in the worst case, the number of packets in a softnet_data may be double of netdev_max_backlog. Signed-off-by: Changli Gao Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 57 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 18 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3d31491..100dcbd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2408,12 +2408,13 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, __get_cpu_var(netdev_rx_stat).total++; rps_lock(sd); - if (sd->input_pkt_queue.qlen <= netdev_max_backlog) { - if (sd->input_pkt_queue.qlen) { + if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { + if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); #ifdef CONFIG_RPS - *qtail = sd->input_queue_head + sd->input_pkt_queue.qlen; + *qtail = sd->input_queue_head + + skb_queue_len(&sd->input_pkt_queue); #endif rps_unlock(sd); local_irq_restore(flags); @@ -2934,13 +2935,21 @@ static void flush_backlog(void *arg) struct sk_buff *skb, *tmp; rps_lock(sd); - skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) + skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev == dev) { __skb_unlink(skb, &sd->input_pkt_queue); kfree_skb(skb); - input_queue_head_incr(sd); + input_queue_head_add(sd, 1); } + } rps_unlock(sd); + + skb_queue_walk_safe(&sd->process_queue, skb, tmp) { + if (skb->dev == dev) { + __skb_unlink(skb, &sd->process_queue); + kfree_skb(skb); + } + } } static int napi_gro_complete(struct sk_buff *skb) @@ -3286,24 +3295,33 @@ static int process_backlog(struct napi_struct *napi, int quota) } #endif napi->weight = weight_p; - do { + local_irq_disable(); + while (work < quota) { struct sk_buff *skb; + unsigned int qlen; + + while ((skb = __skb_dequeue(&sd->process_queue))) { + local_irq_enable(); + __netif_receive_skb(skb); + if (++work >= quota) + return work; + local_irq_disable(); + } - local_irq_disable(); rps_lock(sd); - skb = __skb_dequeue(&sd->input_pkt_queue); - if (!skb) { + qlen = skb_queue_len(&sd->input_pkt_queue); + if (qlen) { + input_queue_head_add(sd, qlen); + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); + } + if (qlen < quota - work) { __napi_complete(napi); - rps_unlock(sd); - local_irq_enable(); - break; + quota = work + qlen; } - input_queue_head_incr(sd); rps_unlock(sd); - local_irq_enable(); - - __netif_receive_skb(skb); - } while (++work < quota); + } + local_irq_enable(); return work; } @@ -5630,8 +5648,10 @@ static int dev_cpu_callback(struct notifier_block *nfb, /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); - input_queue_head_incr(oldsd); + input_queue_head_add(oldsd, 1); } + while ((skb = __skb_dequeue(&oldsd->process_queue))) + netif_rx(skb); return NOTIFY_OK; } @@ -5850,6 +5870,7 @@ static int __init net_dev_init(void) struct softnet_data *sd = &per_cpu(softnet_data, i); skb_queue_head_init(&sd->input_pkt_queue); + skb_queue_head_init(&sd->process_queue); sd->completion_queue = NULL; INIT_LIST_HEAD(&sd->poll_list); sd->output_queue = NULL; -- cgit v1.1 From dee42870a423ad485129f43cddfe7275479f11d8 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 2 May 2010 05:42:16 +0000 Subject: net: fix softnet_stat Per cpu variable softnet_data.total was shared between IRQ and SoftIRQ context without any protection. And enqueue_to_backlog should update the netdev_rx_stat of the target CPU. This patch renames softnet_data.total to softnet_data.processed: the number of packets processed in uppper levels(IP stacks). softnet_stat data is moved into softnet_data. Signed-off-by: Changli Gao ---- include/linux/netdevice.h | 17 +++++++---------- net/core/dev.c | 26 ++++++++++++-------------- net/sched/sch_generic.c | 2 +- 3 files changed, 20 insertions(+), 25 deletions(-) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 100dcbd..36d53be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2205,8 +2205,6 @@ int netdev_max_backlog __read_mostly = 1000; int netdev_budget __read_mostly = 300; int weight_p __read_mostly = 64; /* old backlog weight */ -DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; - #ifdef CONFIG_RPS /* One global table that all flow-based protocols share. */ @@ -2366,7 +2364,7 @@ static void rps_trigger_softirq(void *data) struct softnet_data *sd = data; __napi_schedule(&sd->backlog); - __get_cpu_var(netdev_rx_stat).received_rps++; + sd->received_rps++; } #endif /* CONFIG_RPS */ @@ -2405,7 +2403,6 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, sd = &per_cpu(softnet_data, cpu); local_irq_save(flags); - __get_cpu_var(netdev_rx_stat).total++; rps_lock(sd); if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { @@ -2429,9 +2426,9 @@ enqueue: goto enqueue; } + sd->dropped++; rps_unlock(sd); - __get_cpu_var(netdev_rx_stat).dropped++; local_irq_restore(flags); kfree_skb(skb); @@ -2806,7 +2803,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb->dev = master; } - __get_cpu_var(netdev_rx_stat).total++; + __get_cpu_var(softnet_data).processed++; skb_reset_network_header(skb); skb_reset_transport_header(skb); @@ -3490,7 +3487,7 @@ out: return; softnet_break: - __get_cpu_var(netdev_rx_stat).time_squeeze++; + sd->time_squeeze++; __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } @@ -3691,17 +3688,17 @@ static int dev_seq_show(struct seq_file *seq, void *v) return 0; } -static struct netif_rx_stats *softnet_get_online(loff_t *pos) +static struct softnet_data *softnet_get_online(loff_t *pos) { - struct netif_rx_stats *rc = NULL; + struct softnet_data *sd = NULL; while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { - rc = &per_cpu(netdev_rx_stat, *pos); + sd = &per_cpu(softnet_data, *pos); break; } else ++*pos; - return rc; + return sd; } static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) @@ -3721,12 +3718,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { - struct netif_rx_stats *s = v; + struct softnet_data *sd = v; seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - s->total, s->dropped, s->time_squeeze, 0, + sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - s->cpu_collision, s->received_rps); + sd->cpu_collision, sd->received_rps); return 0; } @@ -5869,6 +5866,7 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); + memset(sd, 0, sizeof(*sd)); skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); sd->completion_queue = NULL; -- cgit v1.1 From eecfd7c4e36ff532d895885971d01d049bd3e014 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 May 2010 22:07:48 -0700 Subject: rps: Various optimizations Introduce ____napi_schedule() helper for callers in irq disabled contexts. rps_trigger_softirq() becomes a leaf function. Use container_of() in process_backlog() instead of accessing per_cpu address. Use a custom inlined version of __napi_complete() in process_backlog() to avoid one locked instruction : only current cpu owns and manipulates this napi, and NAPI_STATE_SCHED is the only possible flag set on backlog. we can use a plain write instead of clear_bit(), and we dont need an smp_mb() memory barrier, since RPS is on, backlog is protected by a spinlock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 36d53be..32611c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2205,6 +2205,14 @@ int netdev_max_backlog __read_mostly = 1000; int netdev_budget __read_mostly = 300; int weight_p __read_mostly = 64; /* old backlog weight */ +/* Called with irq disabled */ +static inline void ____napi_schedule(struct softnet_data *sd, + struct napi_struct *napi) +{ + list_add_tail(&napi->poll_list, &sd->poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); +} + #ifdef CONFIG_RPS /* One global table that all flow-based protocols share. */ @@ -2363,7 +2371,7 @@ static void rps_trigger_softirq(void *data) { struct softnet_data *sd = data; - __napi_schedule(&sd->backlog); + ____napi_schedule(sd, &sd->backlog); sd->received_rps++; } @@ -2421,7 +2429,7 @@ enqueue: /* Schedule NAPI for backlog device */ if (napi_schedule_prep(&sd->backlog)) { if (!rps_ipi_queued(sd)) - __napi_schedule(&sd->backlog); + ____napi_schedule(sd, &sd->backlog); } goto enqueue; } @@ -3280,7 +3288,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); #ifdef CONFIG_RPS /* Check if we have pending ipi, its better to send them now, @@ -3313,7 +3321,16 @@ static int process_backlog(struct napi_struct *napi, int quota) &sd->process_queue); } if (qlen < quota - work) { - __napi_complete(napi); + /* + * Inline a custom version of __napi_complete(). + * only current cpu owns and manipulates this napi, + * and NAPI_STATE_SCHED is the only possible flag set on backlog. + * we can use a plain write instead of clear_bit(), + * and we dont need an smp_mb() memory barrier. + */ + list_del(&napi->poll_list); + napi->state = 0; + quota = work + qlen; } rps_unlock(sd); @@ -3334,8 +3351,7 @@ void __napi_schedule(struct napi_struct *n) unsigned long flags; local_irq_save(flags); - list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + ____napi_schedule(&__get_cpu_var(softnet_data), n); local_irq_restore(flags); } EXPORT_SYMBOL(__napi_schedule); -- cgit v1.1