From c936e8bd1de2fa50c49e3df6fa5036bf07870b67 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Mon, 31 May 2010 16:41:09 +0200 Subject: netfilter: don't xt_jumpstack_alloc twice in xt_register_table In xt_register_table, xt_jumpstack_alloc is called first, later xt_replace_table is used. But in xt_replace_table, xt_jumpstack_alloc will be used again. Then the memory allocated by previous xt_jumpstack_alloc will be leaked. We can simply remove the previous xt_jumpstack_alloc because there aren't any users of newinfo between xt_jumpstack_alloc and xt_replace_table. Signed-off-by: Xiaotian Feng Cc: Patrick McHardy Cc: "David S. Miller" Cc: Jan Engelhardt Cc: Andrew Morton Cc: Rusty Russell Cc: Alexey Dobriyan Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/x_tables.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 445de70..47b1e79 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -844,10 +844,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table_info *private; struct xt_table *t, *table; - ret = xt_jumpstack_alloc(newinfo); - if (ret < 0) - return ERR_PTR(ret); - /* Don't add one object to multiple lists. */ table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { -- cgit v1.1 From 7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 16:41:35 +0200 Subject: netfilter: xtables: stackptr should be percpu commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant) introduced a performance regression, because stackptr array is shared by all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache line) Fix this using alloc_percpu() Signed-off-by: Eric Dumazet Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/netfilter/x_tables.c | 13 +++---------- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 63958f3..4b6c5ca 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6f517bd..9d2d68f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 47b1e79..e34622f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info) vfree(info->jumpstack); else kfree(info->jumpstack); - if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) - vfree(info->stackptr); - else - kfree(info->stackptr); + + free_percpu(info->stackptr); kfree(info); } @@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) unsigned int size; int cpu; - size = sizeof(unsigned int) * nr_cpu_ids; - if (size > PAGE_SIZE) - i->stackptr = vmalloc(size); - else - i->stackptr = kmalloc(size, GFP_KERNEL); + i->stackptr = alloc_percpu(unsigned int); if (i->stackptr == NULL) return -ENOMEM; - memset(i->stackptr, 0, size); size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) -- cgit v1.1 From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 23:44:05 -0700 Subject: net: sock_queue_err_skb() dont mess with sk_forward_alloc Correct sk_forward_alloc handling for error_queue would need to use a backlog of frames that softirq handler could not deliver because socket is owned by user thread. Or extend backlog processing to be able to process normal and error packets. Another possibility is to not use mem charge for error queue, this is what I implemented in this patch. Note: this reverts commit 29030374 (net: fix sk_forward_alloc corruptions), since we dont need to lock socket anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 30 ++++++++++++++++++++++++++++-- net/ipv4/udp.c | 6 ++---- net/ipv6/udp.c | 6 ++---- 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e7ac09..9f07e74 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); +static void sock_rmem_free(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); +} + +/* + * Note: We dont mem charge error packets (no sk_forward_alloc changes) + */ +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf) + return -ENOMEM; + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); + return 0; +} +EXPORT_SYMBOL(sock_queue_err_skb); + void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { @@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; - bh_lock_sock(sk); err = sock_queue_err_skb(sk, skb); - bh_unlock_sock(sk); if (err) kfree_skb(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 50678f9..eec4ff4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!inet->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; - } else { - bh_lock_sock(sk); + } else ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3048f90..87be586 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) goto out; - if (np->recverr) { - bh_lock_sock(sk); + if (np->recverr) ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: -- cgit v1.1 From 288fcee8b7aa98796d96cd5b1b2e8005639328bf Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 May 2010 23:48:19 -0700 Subject: net/ipv4/tcp_input.c: fix compilation breakage when FASTRETRANS_DEBUG > 1 Commit: c720c7e8383aff1cb219bddf474ed89d850336e3 missed these. Signed-off-by: Joe Perches Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e6dafc..548d575 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2639,7 +2639,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) if (sk->sk_family == AF_INET) { printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", msg, - &inet->daddr, ntohs(inet->dport), + &inet->inet_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); @@ -2649,7 +2649,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) struct ipv6_pinfo *np = inet6_sk(sk); printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->dport), + &np->daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); -- cgit v1.1 From 3ed37a6fa70a3a63dbb257fc640facb3974bba40 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 May 2010 17:23:21 +0000 Subject: net/ipv4/igmp.c: Remove unnecessary kmalloc casts Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5fff865..250cb5e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1646,8 +1646,7 @@ static int sf_setstate(struct ip_mc_list *pmc) if (dpsf->sf_inaddr == psf->sf_inaddr) break; if (!dpsf) { - dpsf = (struct ip_sf_list *) - kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); if (!dpsf) continue; *dpsf = *psf; -- cgit v1.1 From 5d55354f147bcf82b7a330bf9617b82a692b7d49 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 31 May 2010 17:23:22 +0000 Subject: net/ipv6/mcast.c: Remove unnecessary kmalloc casts Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 59f1881..7b5fb43 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1995,8 +1995,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) &psf->sf_addr)) break; if (!dpsf) { - dpsf = (struct ip6_sf_list *) - kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); if (!dpsf) continue; *dpsf = *psf; -- cgit v1.1 From aac4dddc358acfd9d98b20024a42c34dfab31c39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jun 2010 00:26:58 -0700 Subject: ipv6: get rid of ipip6_prl_lock As noticed by Julia Lawall, ipip6_tunnel_add_prl() incorrectly calls kzallloc(..., GFP_KERNEL) while a spinlock is held. She provided a patch to use GFP_ATOMIC instead. One possibility would be to convert this spinlock to a mutex, or preallocate the thing before taking the lock. After RCU conversion, it appears we dont need this lock, since caller already holds RTNL Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e51e650..702c532 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -249,8 +249,6 @@ failed: return NULL; } -static DEFINE_SPINLOCK(ipip6_prl_lock); - #define for_each_prl_rcu(start) \ for (prl = rcu_dereference(start); \ prl; \ @@ -340,7 +338,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) if (a->addr == htonl(INADDR_ANY)) return -EINVAL; - spin_lock(&ipip6_prl_lock); + ASSERT_RTNL(); for (p = t->prl; p; p = p->next) { if (p->addr == a->addr) { @@ -370,7 +368,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) t->prl_count++; rcu_assign_pointer(t->prl, p); out: - spin_unlock(&ipip6_prl_lock); return err; } @@ -397,7 +394,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) struct ip_tunnel_prl_entry *x, **p; int err = 0; - spin_lock(&ipip6_prl_lock); + ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; *p; p = &(*p)->next) { @@ -419,7 +416,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) } } out: - spin_unlock(&ipip6_prl_lock); return err; } -- cgit v1.1 From 8ae5977ff95c03fe6c36a5721c57dcb4bfe4f290 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 30 May 2010 14:52:58 +0200 Subject: mac80211: fix blockack-req processing Daniel reported that the paged RX changes had broken blockack request frame processing due to using data that wasn't really part of the skb data. Fix this using skb_copy_bits() for the needed data. As a side effect, this adds a check on processing too short frames, which previously this code could do. Reported-by: Daniel Halperin Signed-off-by: Johannes Berg Acked-by: Daniel Halperin Signed-off-by: John W. Linville --- net/mac80211/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2a7bc..5e0b654 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1818,17 +1818,26 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_CONTINUE; if (ieee80211_is_back_req(bar->frame_control)) { + struct { + __le16 control, start_seq_num; + } __packed bar_data; + if (!rx->sta) return RX_DROP_MONITOR; + + if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control), + &bar_data, sizeof(bar_data))) + return RX_DROP_MONITOR; + spin_lock(&rx->sta->lock); - tid = le16_to_cpu(bar->control) >> 12; + tid = le16_to_cpu(bar_data.control) >> 12; if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) { spin_unlock(&rx->sta->lock); return RX_DROP_MONITOR; } tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; - start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; + start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; /* reset session timer */ if (tid_agg_rx->timeout) -- cgit v1.1 From 51a0d38de26226f2779912d92f155b93d539da9a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 31 May 2010 12:00:12 +0200 Subject: mac80211: fix dialog token allocator The dialog token allocator has apparently been broken since b83f4e15 ("mac80211: fix deadlock in sta->lock") because it got moved out under the spinlock. Fix it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c163d0a..98258b7 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -332,14 +332,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) IEEE80211_QUEUE_STOP_REASON_AGGREGATION); spin_unlock(&local->ampdu_lock); - spin_unlock_bh(&sta->lock); - /* send an addBA request */ + /* prepare tid data */ sta->ampdu_mlme.dialog_token_allocator++; sta->ampdu_mlme.tid_tx[tid]->dialog_token = sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + spin_unlock_bh(&sta->lock); + + /* send AddBA request */ ieee80211_send_addba_request(sdata, pubsta->addr, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, -- cgit v1.1 From fafeeb6c80e3842c6dc19d05de09a23f23eef0d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jun 2010 10:04:49 +0000 Subject: xfrm: force a dst reference in __xfrm_route_forward() Packets going through __xfrm_route_forward() have a not refcounted dst entry, since we enabled a noref forwarding path. xfrm_lookup() might incorrectly release this dst entry. It's a bit late to make invasive changes in xfrm_lookup(), so lets force a refcount in this path. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d965a2b..4bf27d9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2153,6 +2153,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } + skb_dst_force(skb); dst = skb_dst(skb); res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; -- cgit v1.1 From bc135b23d01acf7ee926aaf75b0020c86d3869f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jun 2010 03:23:51 -0700 Subject: net: Define accessors to manipulate QDISC_STATE_RUNNING Define three helpers to manipulate QDISC_STATE_RUNNIG flag, that a second patch will move on another location. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- net/sched/sch_generic.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 983a3c1..2733226 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2047,7 +2047,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, kfree_skb(skb); rc = NET_XMIT_DROP; } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && - !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { + qdisc_run_begin(q)) { /* * This is a work-conserving queue; there are no old skbs * waiting to be sent out; and the qdisc is not running - @@ -2059,7 +2059,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (sch_direct_xmit(skb, q, dev, txq, root_lock)) __qdisc_run(q); else - clear_bit(__QDISC_STATE_RUNNING, &q->state); + qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index bd1892f..37b86ea 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q) } } - clear_bit(__QDISC_STATE_RUNNING, &q->state); + qdisc_run_end(q); } unsigned long dev_trans_start(struct net_device *dev) @@ -797,7 +797,7 @@ static bool some_qdisc_is_busy(struct net_device *dev) spin_lock_bh(root_lock); - val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || + val = (qdisc_is_running(q) || test_bit(__QDISC_STATE_SCHED, &q->state)); spin_unlock_bh(root_lock); -- cgit v1.1 From 194dbcc8a1a97cbac9a619a563e5f6b7f7d5a485 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:06 +0000 Subject: net: init_vlan should not copy slave or master flags The vlan device should not copy the slave or master flags from the real device. It is not in the bond until added nor is it a master. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 55be908..5298426 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -708,7 +708,8 @@ static int vlan_dev_init(struct net_device *dev) netif_carrier_off(dev); /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); + dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | + IFF_MASTER | IFF_SLAVE); dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | -- cgit v1.1 From 2df4a0fa1540c460ec69788ab2a901cc72a75644 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 12 May 2010 21:31:11 +0000 Subject: net: fix conflict between null_or_orig and null_or_bond If a skb is received on an inactive bond that does not meet the special cases checked for by skb_bond_should_drop it should only be delivered to exact matches as the comment in netif_receive_skb() says. However because null_or_bond could also be null this is not always true. This patch renames null_or_bond to orig_or_bond and initializes it to orig_dev. This keeps the intent of null_or_bond to pass frames received on VLAN interfaces stacked on bonding interfaces without invalidating the statement for null_or_orig. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1845b08..d03470f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2795,7 +2795,7 @@ static int __netif_receive_skb(struct sk_buff *skb) struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; - struct net_device *null_or_bond; + struct net_device *orig_or_bond; int ret = NET_RX_DROP; __be16 type; @@ -2868,10 +2868,10 @@ ncls: * device that may have registered for a specific ptype. The * handler may have to adjust skb->dev and orig_dev. */ - null_or_bond = NULL; + orig_or_bond = orig_dev; if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - null_or_bond = vlan_dev_real_dev(skb->dev); + orig_or_bond = vlan_dev_real_dev(skb->dev); } type = skb->protocol; @@ -2879,7 +2879,7 @@ ncls: &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (ptype->dev == null_or_orig || ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == null_or_bond)) { + ptype->dev == orig_or_bond)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.1 From 79640a4ca6955e3ebdb7038508fa7a0cd7fa5527 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jun 2010 05:09:29 -0700 Subject: net: add additional lock to qdisc to increase throughput When many cpus compete for sending frames on a given qdisc, the qdisc spinlock suffers from very high contention. The cpu owning __QDISC_STATE_RUNNING bit has same priority to acquire the lock, and cannot dequeue packets fast enough, since it must wait for this lock for each dequeued packet. One solution to this problem is to force all cpus spinning on a second lock before trying to get the main lock, when/if they see __QDISC_STATE_RUNNING already set. The owning cpu then compete with at most one other cpu for the main lock, allowing for higher dequeueing rate. Based on a previous patch from Alexander Duyck. I added the heuristic to avoid the atomic in fast path, and put the new lock far away from the cache line used by the dequeue worker. Also try to release the busylock lock as late as possible. Tests with following script gave a boost from ~50.000 pps to ~600.000 pps on a dual quad core machine (E5450 @3.00GHz), tg3 driver. (A single netperf flow can reach ~800.000 pps on this platform) for j in `seq 0 3`; do for i in `seq 0 7`; do netperf -H 192.168.0.1 -t UDP_STREAM -l 60 -N -T $i -- -m 6 & done done Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/dev.c | 29 +++++++++++++++++++++++++---- net/sched/sch_generic.c | 1 + 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2733226..ffca5c10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2040,8 +2040,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); + bool contended = qdisc_is_running(q); int rc; + /* + * Heuristic to force contended enqueues to serialize on a + * separate lock before trying to get qdisc main lock. + * This permits __QDISC_STATE_RUNNING owner to get the lock more often + * and dequeue packets faster. + */ + if (unlikely(contended)) + spin_lock(&q->busylock); + spin_lock(root_lock); if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { kfree_skb(skb); @@ -2056,19 +2066,30 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); __qdisc_update_bstats(q, skb->len); - if (sch_direct_xmit(skb, q, dev, txq, root_lock)) + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } __qdisc_run(q); - else + } else qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); rc = qdisc_enqueue_root(skb, q); - qdisc_run(q); + if (qdisc_run_begin(q)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } + __qdisc_run(q); + } } spin_unlock(root_lock); - + if (unlikely(contended)) + spin_unlock(&q->busylock); return rc; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 37b86ea..d20fcd2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -561,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); + spin_lock_init(&sch->busylock); sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; -- cgit v1.1 From c2d9ba9bce8d7323ca96f239e1f505c14d6244fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Jun 2010 06:51:19 +0000 Subject: net: CONFIG_NET_NS reduction Use read_pnet() and write_pnet() to reduce number of ifdef CONFIG_NET_NS Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 6 +----- net/netfilter/nf_conntrack_core.c | 8 ++------ 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 8c4348c..f0e774c 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -53,11 +53,7 @@ static struct ip6addrlbl_table static inline struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) { -#ifdef CONFIG_NET_NS - return lbl->lbl_net; -#else - return &init_net; -#endif + return read_pnet(&lbl->lbl_net); } /* diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index eeeb8bc..7728898 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); -#ifdef CONFIG_NET_NS - ct->ct_net = net; -#endif + write_pnet(&ct->ct_net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES if (zone) { struct nf_conntrack_zone *nf_ct_zone; @@ -1363,9 +1361,7 @@ static int nf_conntrack_init_init_net(void) goto err_extend; #endif /* Set up fake conntrack: to never be deleted, not in any hashes */ -#ifdef CONFIG_NET_NS - nf_conntrack_untracked.ct_net = &init_net; -#endif + write_pnet(&nf_conntrack_untracked.ct_net, &init_net); atomic_set(&nf_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); -- cgit v1.1 From 614f60fa9d73a9e8fdff3df83381907fea7c5649 Mon Sep 17 00:00:00 2001 From: Scott McMillan Date: Wed, 2 Jun 2010 05:53:56 -0700 Subject: packet_mmap: expose hw packet timestamps to network packet capture utilities This patch adds a setting, PACKET_TIMESTAMP, to specify the packet timestamp source that is exported to capture utilities like tcpdump by packet_mmap. PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING. However, only the SOF_TIMESTAMPING_SYS_HARDWARE and SOF_TIMESTAMPING_RAW_HARDWARE values are currently recognized by PACKET_TIMESTAMP. SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set. If PACKET_TIMESTAMP is not set, a software timestamp generated inside the networking stack is used (the behavior before this setting was added). Signed-off-by: Scott McMillan Signed-off-by: David S. Miller --- net/packet/af_packet.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2078a27..9a17f28 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -83,6 +83,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -202,6 +203,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; + unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; @@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sk_buff *copy_skb = NULL; struct timeval tv; struct timespec ts; + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; - if (skb->tstamp.tv64) + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + tv = ktime_to_timeval(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + tv = ktime_to_timeval(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) tv = ktime_to_timeval(skb->tstamp); else do_gettimeofday(&tv); @@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; - if (skb->tstamp.tv64) + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + ts = ktime_to_timespec(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + ts = ktime_to_timespec(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) ts = ktime_to_timespec(skb->tstamp); else getnstimeofday(&ts); @@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->has_vnet_hdr = !!val; return 0; } + case PACKET_TIMESTAMP: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->tp_tstamp = val; + return 0; + } default: return -ENOPROTOOPT; } @@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_loss; data = &val; break; + case PACKET_TIMESTAMP: + if (len > sizeof(int)) + len = sizeof(int); + val = po->tp_tstamp; + data = &val; + break; default: return -ENOPROTOOPT; } -- cgit v1.1 From 33c29dde7d04dc0ec0edb649d20ccf1351c13a06 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 14:26:59 +0000 Subject: act_nat: fix the wrong checksum when addr isn't in old_addr/mask fix the wrong checksum when addr isn't in old_addr/mask For TCP and UDP packets, when addr isn't in old_addr/mask we don't do SNAT or DNAT, and we should not update layer 4 checksum. Signed-off-by: Changli Gao ---- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) Signed-off-by: David S. Miller --- net/sched/act_nat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index d885ba3..5709494 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->daddr = new_addr; csum_replace4(&iph->check, addr, new_addr); + } else if ((iph->frag_off & htons(IP_OFFSET)) || + iph->protocol != IPPROTO_ICMP) { + goto out; } ihl = iph->ihl * 4; @@ -247,6 +250,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, break; } +out: return action; drop: -- cgit v1.1 From 130c0f47fdf9c533deb1136cad5136f6cb7020f0 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 30 May 2010 17:19:53 +0000 Subject: ipconfig: send host-name in DHCP requests Normally dhclient can be configured to send the "host-name" option in DHCP requests to update the client's DNS record. However for an NFSROOT system, dhclient shall never be called (which may change the IP addr and therefore lose your root NFS mount connection). So enable updating the DNS record with kernel parameter ip=::::$HOST_NAME::dhcp Signed-off-by: Wu Fengguang Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b9d84e8..3a6e1ec 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options) memcpy(e, ic_req_params, sizeof(ic_req_params)); e += sizeof(ic_req_params); + if (ic_host_name_set) { + *e++ = 12; /* host-name */ + len = strlen(utsname()->nodename); + *e++ = len; + memcpy(e, utsname()->nodename, len); + e += len; + } if (*vendor_class_identifier) { printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", vendor_class_identifier); -- cgit v1.1 From 20c59de2e6b6bc74bbf714dcd4e720afe8d516cf Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Tue, 1 Jun 2010 21:35:01 +0000 Subject: ipv6: Refactor update of IPv6 flowi destination address for srcrt (RH) option There are more than a dozen occurrences of following code in the IPv6 stack: if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; ipv6_addr_copy(&final, &fl.fl6_dst); ipv6_addr_copy(&fl.fl6_dst, rt0->addr); final_p = &final; } Replace those with a helper. Note that the helper overrides final_p in all cases. This is ok as final_p was previously initialized to NULL when declared. Signed-off-by: Arnaud Ebalard Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 30 ++++++------------------------ net/ipv6/af_inet6.c | 9 ++------- net/ipv6/datagram.c | 18 ++++-------------- net/ipv6/exthdrs.c | 24 ++++++++++++++++++++++++ net/ipv6/inet6_connection_sock.c | 9 ++------- net/ipv6/raw.c | 10 ++-------- net/ipv6/syncookies.c | 9 ++------- net/ipv6/tcp_ipv6.c | 27 ++++++--------------------- net/ipv6/udp.c | 11 +++-------- 9 files changed, 51 insertions(+), 96 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 0916988..6e3f325 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct ipv6_txoptions *opt = NULL; - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; int err = -1; struct dst_entry *dst; @@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, opt = np->opt; - if (opt != NULL && opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, goto out_overflow; if (dst == NULL) { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - if (opt != NULL && opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; @@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct in6_addr *saddr = NULL, *final_p, final; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); - if (np->opt != NULL && np->opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e733942d..94b1b9c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); @@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7126846..7d929a2 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; struct flowi fl; struct ip6_flowlabel *flowlabel = NULL; + struct ipv6_txoptions *opt; int addr_type; int err; @@ -155,19 +156,8 @@ ipv4_connected: security_sk_classify_flow(sk, &fl); - if (flowlabel) { - if (flowlabel->opt && flowlabel->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - } else if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + opt = flowlabel ? flowlabel->opt : np->opt; + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8a659f9..853a633 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -874,3 +874,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, return opt; } +/** + * fl6_update_dst - update flowi destination address with info given + * by srcrt option, if any. + * + * @fl: flowi for which fl6_dst is to be updated + * @opt: struct ipv6_txoptions in which to look for srcrt opt + * @orig: copy of original fl6_dst address if modified + * + * Returns NULL if no txoptions or no srcrt, otherwise returns orig + * and initial value of fl->fl6_dst set in orig + */ +struct in6_addr *fl6_update_dst(struct flowi *fl, + const struct ipv6_txoptions *opt, + struct in6_addr *orig) +{ + if (!opt || !opt->srcrt) + return NULL; + + ipv6_addr_copy(orig, &fl->fl6_dst); + ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); + return orig; +} + +EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0c5e3c3..8a16280 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; struct dst_entry *dst; - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; memset(&fl, 0, sizeof(fl)); fl.proto = sk->sk_protocol; @@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb) fl.fl_ip_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4a4dcbe..864eb8e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -725,7 +725,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, { struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct raw6_sock *rp = raw6_sk(sk); @@ -847,13 +847,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); - /* merge ip6_build_xmit from ip6_output */ - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 34d1f06..1238370 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -240,17 +240,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) * me if there is a preferred way. */ { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b7c3a1..e487080 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct in6_addr *saddr = NULL, *final_p, final; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->inet_sport; - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); security_sk_classify_flow(sk, &fl); @@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; - struct in6_addr * final_p = NULL, final; + struct in6_addr * final_p, final; struct flowi fl; struct dst_entry *dst; int err = -1; @@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, security_req_classify_flow(req, &fl); opt = np->opt; - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -1392,18 +1382,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto out_overflow; if (dst == NULL) { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3048f90..4aea57d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -929,7 +929,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi fl; @@ -1099,14 +1099,9 @@ do_udp_sendmsg: ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl_ip_sport = inet->inet_sport; - /* merge ip6_build_xmit from ip6_output */ - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; + final_p = fl6_update_dst(&fl, opt, &final); + if (final_p) connected = 0; - } if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { fl.oif = np->mcast_oif; -- cgit v1.1 From ab95bfe01f9872459c8678572ccadbf646badad0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 1 Jun 2010 21:52:08 +0000 Subject: net: replace hooks in __netif_receive_skb V5 What this patch does is it removes two receive frame hooks (for bridge and for macvlan) from __netif_receive_skb. These are replaced them with a single hook for both. It only supports one hook per device because it makes no sense to do bridging and macvlan on the same device. Then a network driver (of virtual netdev like macvlan or bridge) can register an rx_handler for needed net device. Signed-off-by: Jiri Pirko Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br.c | 2 - net/bridge/br_if.c | 8 ++++ net/bridge/br_input.c | 12 +++-- net/bridge/br_private.h | 3 +- net/core/dev.c | 119 ++++++++++++++++++++++-------------------------- 5 files changed, 73 insertions(+), 71 deletions(-) (limited to 'net') diff --git a/net/bridge/br.c b/net/bridge/br.c index 76357b5..c8436fa 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -63,7 +63,6 @@ static int __init br_init(void) goto err_out4; brioctl_set(br_ioctl_deviceless_stub); - br_handle_frame_hook = br_handle_frame; #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) br_fdb_test_addr_hook = br_fdb_test_addr; @@ -100,7 +99,6 @@ static void __exit br_deinit(void) br_fdb_test_addr_hook = NULL; #endif - br_handle_frame_hook = NULL; br_fdb_fini(); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 18b245e..d924234 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_port *p) list_del_rcu(&p->list); + netdev_rx_handler_unregister(dev); rcu_assign_pointer(dev->br_port, NULL); br_multicast_del_port(p); @@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) goto err2; rcu_assign_pointer(dev->br_port, p); + + err = netdev_rx_handler_register(dev, br_handle_frame); + if (err) + goto err3; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) br_netpoll_enable(br, dev); return 0; +err3: + rcu_assign_pointer(dev->br_port, NULL); err2: br_fdb_delete_by_port(br, p, 1); err1: diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index d36e700..99647d8 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest) } /* - * Called via br_handle_frame_hook. * Return NULL if skb is handled - * note: already called with rcu_read_lock (preempt_disabled) + * note: already called with rcu_read_lock (preempt_disabled) from + * netif_receive_skb */ -struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) +struct sk_buff *br_handle_frame(struct sk_buff *skb) { + struct net_bridge_port *p; const unsigned char *dest = eth_hdr(skb)->h_dest; int (*rhook)(struct sk_buff *skb); + if (skb->pkt_type == PACKET_LOOPBACK) + return skb; + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; @@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (!skb) return NULL; + p = rcu_dereference(skb->dev->br_port); + if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0f4a74b..c83519b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); -extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, - struct sk_buff *skb); +extern struct sk_buff *br_handle_frame(struct sk_buff *skb); /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); diff --git a/net/core/dev.c b/net/core/dev.c index ffca5c10..ec01a59 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2604,70 +2604,14 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } -#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) - -#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ + (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -/* - * If bridge module is loaded call bridging hook. - * returns NULL if packet was consumed. - */ -struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, - struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(br_handle_frame_hook); - -static inline struct sk_buff *handle_bridge(struct sk_buff *skb, - struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) -{ - struct net_bridge_port *port; - - if (skb->pkt_type == PACKET_LOOPBACK || - (port = rcu_dereference(skb->dev->br_port)) == NULL) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - - return br_handle_frame_hook(port, skb); -} -#else -#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) -#endif - -#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) -struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p, - struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); - -static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, - struct net_device *orig_dev) -{ - struct macvlan_port *port; - - port = rcu_dereference(skb->dev->macvlan_port); - if (!port) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - return macvlan_handle_frame_hook(port, skb); -} -#else -#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) -#endif - #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -2763,6 +2707,47 @@ void netif_nit_deliver(struct sk_buff *skb) rcu_read_unlock(); } +/** + * netdev_rx_handler_register - register receive handler + * @dev: device to register a handler for + * @rx_handler: receive handler to register + * + * Register a receive hander for a device. This handler will then be + * called from __netif_receive_skb. A negative errno code is returned + * on a failure. + * + * The caller must hold the rtnl_mutex. + */ +int netdev_rx_handler_register(struct net_device *dev, + rx_handler_func_t *rx_handler) +{ + ASSERT_RTNL(); + + if (dev->rx_handler) + return -EBUSY; + + rcu_assign_pointer(dev->rx_handler, rx_handler); + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_rx_handler_register); + +/** + * netdev_rx_handler_unregister - unregister receive handler + * @dev: device to unregister a handler from + * + * Unregister a receive hander from a device. + * + * The caller must hold the rtnl_mutex. + */ +void netdev_rx_handler_unregister(struct net_device *dev) +{ + + ASSERT_RTNL(); + rcu_assign_pointer(dev->rx_handler, NULL); +} +EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); + static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, struct net_device *master) { @@ -2815,6 +2800,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + rx_handler_func_t *rx_handler; struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; @@ -2877,12 +2863,17 @@ static int __netif_receive_skb(struct sk_buff *skb) ncls: #endif - skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; - skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; + /* Handle special case of bridge or macvlan */ + rx_handler = rcu_dereference(skb->dev->rx_handler); + if (rx_handler) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + skb = rx_handler(skb); + if (!skb) + goto out; + } /* * Make sure frames received on VLAN interfaces stacked on -- cgit v1.1 From edafe502404f3669d364b6e96d79b54067b634b4 Mon Sep 17 00:00:00 2001 From: Daniele Lacamera Date: Wed, 2 Jun 2010 02:02:04 +0000 Subject: TCP: tcp_hybla: Fix integer overflow in slow start increment For large values of rtt, 2^rho operation may overflow u32. Clamp down the increment to 2^16. Signed-off-by: Daniele Lacamera Signed-off-by: David S. Miller --- net/ipv4/tcp_hybla.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index c209e05..377bc93 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * calculate 2^fract in a <<7 value. */ is_slowstart = 1; - increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) - - 128; + increment = ((1 << min(ca->rho, 16U)) * + hybla_fraction(rho_fractions)) - 128; } else { /* * congestion avoidance -- cgit v1.1 From fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 07:32:42 -0700 Subject: cls_u32: use skb_header_pointer() to dereference data safely use skb_header_pointer() to dereference data safely the original skb->data dereference isn't safe, as there isn't any skb->len or skb_is_nonlinear() check. skb_header_pointer() is used instead in this patch. And when the skb isn't long enough, we terminate the function u32_classify() immediately with -1. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 9627542..4f52214 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re { struct { struct tc_u_knode *knode; - u8 *ptr; + unsigned int off; } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; - u8 *ptr = skb_network_header(skb); + unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; @@ -134,8 +134,14 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { - - if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { + unsigned int toff; + __be32 *data, _data; + + toff = off + key->off + (off2 & key->offmask); + data = skb_header_pointer(skb, toff, 4, &_data); + if (!data) + goto out; + if ((*data ^ key->val) & key->mask) { n = n->next; goto next_knode; } @@ -174,29 +180,45 @@ check_terminal: if (sdepth >= TC_U32_MAXDEPTH) goto deadloop; stack[sdepth].knode = n; - stack[sdepth].ptr = ptr; + stack[sdepth].off = off; sdepth++; ht = n->ht_down; sel = 0; - if (ht->divisor) - sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); - + if (ht->divisor) { + __be32 *data, _data; + + data = skb_header_pointer(skb, off + n->sel.hoff, 4, + &_data); + if (!data) + goto out; + sel = ht->divisor & u32_hash_fold(*data, &n->sel, + n->fshift); + } if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) goto next_ht; if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; - if (n->sel.flags&TC_U32_VAROFFSET) - off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; + if (n->sel.flags & TC_U32_VAROFFSET) { + __be16 *data, _data; + + data = skb_header_pointer(skb, + off + n->sel.offoff, + 2, &_data); + if (!data) + goto out; + off2 += ntohs(n->sel.offmask & *data) >> + n->sel.offshift; + } off2 &= ~3; } if (n->sel.flags&TC_U32_EAT) { - ptr += off2; + off += off2; off2 = 0; } - if (ptr < skb_tail_pointer(skb)) + if (off < skb->len) goto next_ht; } @@ -204,9 +226,10 @@ check_terminal: if (sdepth--) { n = stack[sdepth].knode; ht = n->ht_up; - ptr = stack[sdepth].ptr; + off = stack[sdepth].off; goto check_terminal; } +out: return -1; deadloop: -- cgit v1.1 From ec8aa669b8393b6789b1954d587c63264af7ff99 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 13 May 2010 16:48:03 +0200 Subject: mac80211: add the minstrel_ht rate control algorithm Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/Kconfig | 7 + net/mac80211/Makefile | 4 + net/mac80211/main.c | 7 + net/mac80211/rate.h | 13 + net/mac80211/rc80211_minstrel_ht.c | 824 +++++++++++++++++++++++++++++ net/mac80211/rc80211_minstrel_ht.h | 128 +++++ net/mac80211/rc80211_minstrel_ht_debugfs.c | 120 +++++ 7 files changed, 1103 insertions(+) create mode 100644 net/mac80211/rc80211_minstrel_ht.c create mode 100644 net/mac80211/rc80211_minstrel_ht.h create mode 100644 net/mac80211/rc80211_minstrel_ht_debugfs.c (limited to 'net') diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 8a91f6c..83eec7a 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -33,6 +33,13 @@ config MAC80211_RC_MINSTREL ---help--- This option enables the 'minstrel' TX rate control algorithm +config MAC80211_RC_MINSTREL_HT + bool "Minstrel 802.11n support" if EMBEDDED + depends on MAC80211_RC_MINSTREL + default y + ---help--- + This option enables the 'minstrel_ht' TX rate control algorithm + choice prompt "Default rate control algorithm" depends on MAC80211_HAS_RC diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 84b48ba..fdb54e6 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -51,7 +51,11 @@ rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o rc80211_minstrel-y := rc80211_minstrel.o rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o +rc80211_minstrel_ht-y := rc80211_minstrel_ht.o +rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o + mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) +mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y) ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 22a384d..c8548e61 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -704,6 +704,10 @@ static int __init ieee80211_init(void) if (ret) return ret; + ret = rc80211_minstrel_ht_init(); + if (ret) + goto err_minstrel; + ret = rc80211_pid_init(); if (ret) goto err_pid; @@ -716,6 +720,8 @@ static int __init ieee80211_init(void) err_netdev: rc80211_pid_exit(); err_pid: + rc80211_minstrel_ht_exit(); + err_minstrel: rc80211_minstrel_exit(); return ret; @@ -724,6 +730,7 @@ static int __init ieee80211_init(void) static void __exit ieee80211_exit(void) { rc80211_pid_exit(); + rc80211_minstrel_ht_exit(); rc80211_minstrel_exit(); /* diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 065a9619..168427b 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -147,5 +147,18 @@ static inline void rc80211_minstrel_exit(void) } #endif +#ifdef CONFIG_MAC80211_RC_MINSTREL_HT +extern int rc80211_minstrel_ht_init(void); +extern void rc80211_minstrel_ht_exit(void); +#else +static inline int rc80211_minstrel_ht_init(void) +{ + return 0; +} +static inline void rc80211_minstrel_ht_exit(void) +{ +} +#endif + #endif /* IEEE80211_RATE_H */ diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c new file mode 100644 index 0000000..c23f082 --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -0,0 +1,824 @@ +/* + * Copyright (C) 2010 Felix Fietkau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include "rate.h" +#include "rc80211_minstrel.h" +#include "rc80211_minstrel_ht.h" + +#define AVG_PKT_SIZE 1200 +#define SAMPLE_COLUMNS 10 +#define EWMA_LEVEL 75 + +/* Number of bits for an average sized packet */ +#define MCS_NBITS (AVG_PKT_SIZE << 3) + +/* Number of symbols for a packet with (bps) bits per symbol */ +#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) + +/* Transmission time for a packet containing (syms) symbols */ +#define MCS_SYMBOL_TIME(sgi, syms) \ + (sgi ? \ + ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \ + (syms) << 2 /* syms * 4 us */ \ + ) + +/* Transmit duration for the raw data part of an average sized packet */ +#define MCS_DURATION(streams, sgi, bps) MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) + +/* MCS rate information for an MCS group */ +#define MCS_GROUP(_streams, _sgi, _ht40) { \ + .streams = _streams, \ + .flags = \ + (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ + (_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ + .duration = { \ + MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) \ + } \ +} + +/* + * To enable sufficiently targeted rate sampling, MCS rates are divided into + * groups, based on the number of streams and flags (HT40, SGI) that they + * use. + */ +const struct mcs_group minstrel_mcs_groups[] = { + MCS_GROUP(1, 0, 0), + MCS_GROUP(2, 0, 0), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 0, 0), +#endif + + MCS_GROUP(1, 1, 0), + MCS_GROUP(2, 1, 0), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 1, 0), +#endif + + MCS_GROUP(1, 0, 1), + MCS_GROUP(2, 0, 1), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 0, 1), +#endif + + MCS_GROUP(1, 1, 1), + MCS_GROUP(2, 1, 1), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 1, 1), +#endif +}; + +static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; + +/* + * Perform EWMA (Exponentially Weighted Moving Average) calculation + */ +static int +minstrel_ewma(int old, int new, int weight) +{ + return (new * (100 - weight) + old * weight) / 100; +} + +/* + * Look up an MCS group index based on mac80211 rate information + */ +static int +minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) +{ + int streams = (rate->idx / MCS_GROUP_RATES) + 1; + u32 flags = IEEE80211_TX_RC_SHORT_GI | IEEE80211_TX_RC_40_MHZ_WIDTH; + int i; + + for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { + if (minstrel_mcs_groups[i].streams != streams) + continue; + if (minstrel_mcs_groups[i].flags != (rate->flags & flags)) + continue; + + return i; + } + + WARN_ON(1); + return 0; +} + +static inline struct minstrel_rate_stats * +minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) +{ + return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES]; +} + + +/* + * Recalculate success probabilities and counters for a rate using EWMA + */ +static void +minstrel_calc_rate_ewma(struct minstrel_priv *mp, struct minstrel_rate_stats *mr) +{ + if (unlikely(mr->attempts > 0)) { + mr->sample_skipped = 0; + mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts); + if (!mr->att_hist) + mr->probability = mr->cur_prob; + else + mr->probability = minstrel_ewma(mr->probability, + mr->cur_prob, EWMA_LEVEL); + mr->att_hist += mr->attempts; + mr->succ_hist += mr->success; + } else { + mr->sample_skipped++; + } + mr->last_success = mr->success; + mr->last_attempts = mr->attempts; + mr->success = 0; + mr->attempts = 0; +} + +/* + * Calculate throughput based on the average A-MPDU length, taking into account + * the expected number of retransmissions and their expected length + */ +static void +minstrel_ht_calc_tp(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + int group, int rate) +{ + struct minstrel_rate_stats *mr; + unsigned int usecs; + + mr = &mi->groups[group].rates[rate]; + + if (mr->probability < MINSTREL_FRAC(1, 10)) { + mr->cur_tp = 0; + return; + } + + usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + usecs += minstrel_mcs_groups[group].duration[rate]; + mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); +} + +/* + * Update rate statistics and select new primary rates + * + * Rules for rate selection: + * - max_prob_rate must use only one stream, as a tradeoff between delivery + * probability and throughput during strong fluctuations + * - as long as the max prob rate has a probability of more than 3/4, pick + * higher throughput rates, even if the probablity is a bit lower + */ +static void +minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +{ + struct minstrel_mcs_group_data *mg; + struct minstrel_rate_stats *mr; + int cur_prob, cur_prob_tp, cur_tp, cur_tp2; + int group, i, index; + + if (mi->ampdu_packets > 0) { + mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, + MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL); + mi->ampdu_len = 0; + mi->ampdu_packets = 0; + } + + mi->sample_slow = 0; + mi->sample_count = 0; + mi->max_tp_rate = 0; + mi->max_tp_rate2 = 0; + mi->max_prob_rate = 0; + + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + cur_prob = 0; + cur_prob_tp = 0; + cur_tp = 0; + cur_tp2 = 0; + + mg = &mi->groups[group]; + if (!mg->supported) + continue; + + mg->max_tp_rate = 0; + mg->max_tp_rate2 = 0; + mg->max_prob_rate = 0; + mi->sample_count++; + + for (i = 0; i < MCS_GROUP_RATES; i++) { + if (!(mg->supported & BIT(i))) + continue; + + mr = &mg->rates[i]; + mr->retry_updated = false; + index = MCS_GROUP_RATES * group + i; + minstrel_calc_rate_ewma(mp, mr); + minstrel_ht_calc_tp(mp, mi, group, i); + + if (!mr->cur_tp) + continue; + + /* ignore the lowest rate of each single-stream group */ + if (!i && minstrel_mcs_groups[group].streams == 1) + continue; + + if ((mr->cur_tp > cur_prob_tp && mr->probability > + MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) { + mg->max_prob_rate = index; + cur_prob = mr->probability; + } + + if (mr->cur_tp > cur_tp) { + swap(index, mg->max_tp_rate); + cur_tp = mr->cur_tp; + mr = minstrel_get_ratestats(mi, index); + } + + if (index >= mg->max_tp_rate) + continue; + + if (mr->cur_tp > cur_tp2) { + mg->max_tp_rate2 = index; + cur_tp2 = mr->cur_tp; + } + } + } + + /* try to sample up to half of the availble rates during each interval */ + mi->sample_count *= 4; + + cur_prob = 0; + cur_prob_tp = 0; + cur_tp = 0; + cur_tp2 = 0; + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + mg = &mi->groups[group]; + if (!mg->supported) + continue; + + mr = minstrel_get_ratestats(mi, mg->max_prob_rate); + if (cur_prob_tp < mr->cur_tp && + minstrel_mcs_groups[group].streams == 1) { + mi->max_prob_rate = mg->max_prob_rate; + cur_prob = mr->cur_prob; + } + + mr = minstrel_get_ratestats(mi, mg->max_tp_rate); + if (cur_tp < mr->cur_tp) { + mi->max_tp_rate = mg->max_tp_rate; + cur_tp = mr->cur_tp; + } + + mr = minstrel_get_ratestats(mi, mg->max_tp_rate2); + if (cur_tp2 < mr->cur_tp) { + mi->max_tp_rate2 = mg->max_tp_rate2; + cur_tp2 = mr->cur_tp; + } + } + + mi->stats_update = jiffies; +} + +static bool +minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) +{ + if (!rate->count) + return false; + + if (rate->idx < 0) + return false; + + return !!(rate->flags & IEEE80211_TX_RC_MCS); +} + +static void +minstrel_next_sample_idx(struct minstrel_ht_sta *mi) +{ + struct minstrel_mcs_group_data *mg; + + for (;;) { + mi->sample_group++; + mi->sample_group %= ARRAY_SIZE(minstrel_mcs_groups); + mg = &mi->groups[mi->sample_group]; + + if (!mg->supported) + continue; + + if (++mg->index >= MCS_GROUP_RATES) { + mg->index = 0; + if (++mg->column >= ARRAY_SIZE(sample_table)) + mg->column = 0; + } + break; + } +} + +static void +minstrel_downgrade_rate(struct minstrel_ht_sta *mi, int *idx, bool primary) +{ + int group, orig_group; + + orig_group = group = *idx / MCS_GROUP_RATES; + while (group > 0) { + group--; + + if (!mi->groups[group].supported) + continue; + + if (minstrel_mcs_groups[group].streams > + minstrel_mcs_groups[orig_group].streams) + continue; + + if (primary) + *idx = mi->groups[group].max_tp_rate; + else + *idx = mi->groups[group].max_tp_rate2; + break; + } +} + +static void +minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + u16 tid; + + if (unlikely(!ieee80211_is_data_qos(hdr->frame_control))) + return; + + if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE))) + return; + + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_state_tx[tid] != HT_AGG_STATE_IDLE)) + return; + + ieee80211_start_tx_ba_session(pubsta, tid); +} + +static void +minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_rate *ar = info->status.rates; + struct minstrel_rate_stats *rate, *rate2; + struct minstrel_priv *mp = priv; + bool last = false; + int group; + int i = 0; + + if (!msp->is_ht) + return mac80211_minstrel.tx_status(priv, sband, sta, &msp->legacy, skb); + + /* This packet was aggregated but doesn't carry status info */ + if ((info->flags & IEEE80211_TX_CTL_AMPDU) && + !(info->flags & IEEE80211_TX_STAT_AMPDU)) + return; + + if (!info->status.ampdu_len) { + info->status.ampdu_ack_len = 1; + info->status.ampdu_len = 1; + } + + mi->ampdu_packets++; + mi->ampdu_len += info->status.ampdu_len; + + if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { + mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); + mi->sample_tries = 3; + mi->sample_count--; + } + + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { + mi->sample_packets += info->status.ampdu_len; + minstrel_next_sample_idx(mi); + } + + for (i = 0; !last; i++) { + last = (i == IEEE80211_TX_MAX_RATES - 1) || + !minstrel_ht_txstat_valid(&ar[i + 1]); + + if (!minstrel_ht_txstat_valid(&ar[i])) + break; + + group = minstrel_ht_get_group_idx(&ar[i]); + rate = &mi->groups[group].rates[ar[i].idx % 8]; + + if (last && (info->flags & IEEE80211_TX_STAT_ACK)) + rate->success += info->status.ampdu_ack_len; + + rate->attempts += ar[i].count * info->status.ampdu_len; + } + + /* + * check for sudden death of spatial multiplexing, + * downgrade to a lower number of streams if necessary. + */ + rate = minstrel_get_ratestats(mi, mi->max_tp_rate); + if (rate->attempts > 30 && + MINSTREL_FRAC(rate->success, rate->attempts) < + MINSTREL_FRAC(20, 100)) + minstrel_downgrade_rate(mi, &mi->max_tp_rate, true); + + rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2); + if (rate->attempts > 30 && + MINSTREL_FRAC(rate->success, rate->attempts) < + MINSTREL_FRAC(20, 100)) + minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false); + + if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { + minstrel_ht_update_stats(mp, mi); + minstrel_aggr_check(mp, sta, skb); + } +} + +static void +minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + int index) +{ + struct minstrel_rate_stats *mr; + const struct mcs_group *group; + unsigned int tx_time, tx_time_rtscts, tx_time_data; + unsigned int cw = mp->cw_min; + unsigned int t_slot = 9; /* FIXME */ + unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); + + mr = minstrel_get_ratestats(mi, index); + if (mr->probability < MINSTREL_FRAC(1, 10)) { + mr->retry_count = 1; + mr->retry_count_rtscts = 1; + return; + } + + mr->retry_count = 2; + mr->retry_count_rtscts = 2; + mr->retry_updated = true; + + group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; + tx_time = 2 * (t_slot + mi->overhead + tx_time_data); + tx_time_rtscts = 2 * (t_slot + mi->overhead_rtscts + tx_time_data); + do { + cw = (cw << 1) | 1; + cw = min(cw, mp->cw_max); + tx_time += cw + t_slot + mi->overhead; + tx_time_rtscts += cw + t_slot + mi->overhead_rtscts; + if (tx_time_rtscts < mp->segment_size) + mr->retry_count_rtscts++; + } while ((tx_time < mp->segment_size) && + (++mr->retry_count < mp->max_retry)); +} + + +static void +minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_tx_rate *rate, int index, + struct ieee80211_tx_rate_control *txrc, + bool sample, bool rtscts) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + struct minstrel_rate_stats *mr; + + mr = minstrel_get_ratestats(mi, index); + if (!mr->retry_updated) + minstrel_calc_retransmit(mp, mi, index); + + if (mr->probability < MINSTREL_FRAC(20, 100)) + rate->count = 2; + else if (rtscts) + rate->count = mr->retry_count_rtscts; + else + rate->count = mr->retry_count; + + rate->flags = IEEE80211_TX_RC_MCS | group->flags; + if (txrc->short_preamble) + rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + if (txrc->rts || rtscts) + rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; + rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; +} + +static inline int +minstrel_get_duration(int index) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + return group->duration[index % MCS_GROUP_RATES]; +} + +static int +minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +{ + struct minstrel_rate_stats *mr; + struct minstrel_mcs_group_data *mg; + int sample_idx = 0; + + if (mi->sample_wait > 0) { + mi->sample_wait--; + return -1; + } + + if (!mi->sample_tries) + return -1; + + mi->sample_tries--; + mg = &mi->groups[mi->sample_group]; + sample_idx = sample_table[mg->column][mg->index]; + mr = &mg->rates[sample_idx]; + sample_idx += mi->sample_group * MCS_GROUP_RATES; + + /* + * When not using MRR, do not sample if the probability is already + * higher than 95% to avoid wasting airtime + */ + if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) + goto next; + + /* + * Make sure that lower rates get sampled only occasionally, + * if the link is working perfectly. + */ + if (minstrel_get_duration(sample_idx) > + minstrel_get_duration(mi->max_tp_rate)) { + if (mr->sample_skipped < 10) + goto next; + + if (mi->sample_slow++ > 2) + goto next; + } + + return sample_idx; + +next: + minstrel_next_sample_idx(mi); + return -1; +} + +static void +minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_tx_rate_control *txrc) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); + struct ieee80211_tx_rate *ar = info->status.rates; + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_priv *mp = priv; + int sample_idx; + + if (rate_control_send_low(sta, priv_sta, txrc)) + return; + + if (!msp->is_ht) + return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); + + info->flags |= mi->tx_flags; + sample_idx = minstrel_get_sample_rate(mp, mi); + if (sample_idx >= 0) { + minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, + txrc, true, false); + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, + txrc, false, true); + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } else { + minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, + txrc, false, false); + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, + txrc, false, true); + } + minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true); + + ar[3].count = 0; + ar[3].idx = -1; + + mi->total_packets++; + + /* wraparound */ + if (mi->total_packets == ~0) { + mi->total_packets = 0; + mi->sample_packets = 0; + } +} + +static void +minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + enum nl80211_channel_type oper_chan_type) +{ + struct minstrel_priv *mp = priv; + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; + struct ieee80211_local *local = hw_to_local(mp->hw); + u16 sta_cap = sta->ht_cap.cap; + int ack_dur; + int stbc; + int i; + + /* fall back to the old minstrel for legacy stations */ + if (sta && !sta->ht_cap.ht_supported) { + msp->is_ht = false; + memset(&msp->legacy, 0, sizeof(msp->legacy)); + msp->legacy.r = msp->ratelist; + msp->legacy.sample_table = msp->sample_table; + return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy); + } + + BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != + MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); + + msp->is_ht = true; + memset(mi, 0, sizeof(*mi)); + mi->stats_update = jiffies; + + ack_dur = ieee80211_frame_duration(local, 10, 60, 1, 1); + mi->overhead = ieee80211_frame_duration(local, 0, 60, 1, 1) + ack_dur; + mi->overhead_rtscts = mi->overhead + 2 * ack_dur; + + mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); + + /* When using MRR, sample more on the first attempt, without delay */ + if (mp->has_mrr) { + mi->sample_count = 16; + mi->sample_wait = 0; + } else { + mi->sample_count = 8; + mi->sample_wait = 8; + } + mi->sample_tries = 4; + + stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >> + IEEE80211_HT_CAP_RX_STBC_SHIFT; + mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT; + + if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING) + mi->tx_flags |= IEEE80211_TX_CTL_LDPC; + + if (oper_chan_type != NL80211_CHAN_HT40MINUS && + oper_chan_type != NL80211_CHAN_HT40PLUS) + sta_cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { + u16 req = 0; + + mi->groups[i].supported = 0; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) { + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + req |= IEEE80211_HT_CAP_SGI_40; + else + req |= IEEE80211_HT_CAP_SGI_20; + } + + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + if ((sta_cap & req) != req) + continue; + + mi->groups[i].supported = + mcs->rx_mask[minstrel_mcs_groups[i].streams - 1]; + } +} + +static void +minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_priv *mp = priv; + + minstrel_ht_update_caps(priv, sband, sta, priv_sta, mp->hw->conf.channel_type); +} + +static void +minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + u32 changed, enum nl80211_channel_type oper_chan_type) +{ + minstrel_ht_update_caps(priv, sband, sta, priv_sta, oper_chan_type); +} + +static void * +minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) +{ + struct ieee80211_supported_band *sband; + struct minstrel_ht_sta_priv *msp; + struct minstrel_priv *mp = priv; + struct ieee80211_hw *hw = mp->hw; + int max_rates = 0; + int i; + + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = hw->wiphy->bands[i]; + if (sband && sband->n_bitrates > max_rates) + max_rates = sband->n_bitrates; + } + + msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp); + if (!msp) + return NULL; + + msp->ratelist = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp); + if (!msp->ratelist) + goto error; + + msp->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp); + if (!msp->sample_table) + goto error1; + + return msp; + +error1: + kfree(msp->sample_table); +error: + kfree(msp); + return NULL; +} + +static void +minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + kfree(msp->sample_table); + kfree(msp->ratelist); + kfree(msp); +} + +static void * +minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +{ + return mac80211_minstrel.alloc(hw, debugfsdir); +} + +static void +minstrel_ht_free(void *priv) +{ + mac80211_minstrel.free(priv); +} + +static struct rate_control_ops mac80211_minstrel_ht = { + .name = "minstrel_ht", + .tx_status = minstrel_ht_tx_status, + .get_rate = minstrel_ht_get_rate, + .rate_init = minstrel_ht_rate_init, + .rate_update = minstrel_ht_rate_update, + .alloc_sta = minstrel_ht_alloc_sta, + .free_sta = minstrel_ht_free_sta, + .alloc = minstrel_ht_alloc, + .free = minstrel_ht_free, +#ifdef CONFIG_MAC80211_DEBUGFS + .add_sta_debugfs = minstrel_ht_add_sta_debugfs, + .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs, +#endif +}; + + +static void +init_sample_table(void) +{ + int col, i, new_idx; + u8 rnd[MCS_GROUP_RATES]; + + memset(sample_table, 0xff, sizeof(sample_table)); + for (col = 0; col < SAMPLE_COLUMNS; col++) { + for (i = 0; i < MCS_GROUP_RATES; i++) { + get_random_bytes(rnd, sizeof(rnd)); + new_idx = (i + rnd[i]) % MCS_GROUP_RATES; + + while (sample_table[col][new_idx] != 0xff) + new_idx = (new_idx + 1) % MCS_GROUP_RATES; + + sample_table[col][new_idx] = i; + } + } +} + +int __init +rc80211_minstrel_ht_init(void) +{ + init_sample_table(); + return ieee80211_rate_control_register(&mac80211_minstrel_ht); +} + +void +rc80211_minstrel_ht_exit(void) +{ + ieee80211_rate_control_unregister(&mac80211_minstrel_ht); +} diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h new file mode 100644 index 0000000..696c0fc --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2010 Felix Fietkau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __RC_MINSTREL_HT_H +#define __RC_MINSTREL_HT_H + +/* + * The number of streams can be changed to 2 to reduce code + * size and memory footprint. + */ +#define MINSTREL_MAX_STREAMS 3 +#define MINSTREL_STREAM_GROUPS 4 + +/* scaled fraction values */ +#define MINSTREL_SCALE 16 +#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) +#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) + +#define MCS_GROUP_RATES 8 + +struct mcs_group { + u32 flags; + unsigned int streams; + unsigned int duration[MCS_GROUP_RATES]; +}; + +struct minstrel_rate_stats { + /* current / last sampling period attempts/success counters */ + unsigned int attempts, last_attempts; + unsigned int success, last_success; + + /* total attempts/success counters */ + u64 att_hist, succ_hist; + + /* current throughput */ + unsigned int cur_tp; + + /* packet delivery probabilities */ + unsigned int cur_prob, probability; + + /* maximum retry counts */ + unsigned int retry_count; + unsigned int retry_count_rtscts; + + bool retry_updated; + u8 sample_skipped; +}; + +struct minstrel_mcs_group_data { + u8 index; + u8 column; + + /* bitfield of supported MCS rates of this group */ + u8 supported; + + /* selected primary rates */ + unsigned int max_tp_rate; + unsigned int max_tp_rate2; + unsigned int max_prob_rate; + + /* MCS rate statistics */ + struct minstrel_rate_stats rates[MCS_GROUP_RATES]; +}; + +struct minstrel_ht_sta { + /* ampdu length (average, per sampling interval) */ + unsigned int ampdu_len; + unsigned int ampdu_packets; + + /* ampdu length (EWMA) */ + unsigned int avg_ampdu_len; + + /* best throughput rate */ + unsigned int max_tp_rate; + + /* second best throughput rate */ + unsigned int max_tp_rate2; + + /* best probability rate */ + unsigned int max_prob_rate; + + /* time of last status update */ + unsigned long stats_update; + + /* overhead time in usec for each frame */ + unsigned int overhead; + unsigned int overhead_rtscts; + + unsigned int total_packets; + unsigned int sample_packets; + + /* tx flags to add for frames for this sta */ + u32 tx_flags; + + u8 sample_wait; + u8 sample_tries; + u8 sample_count; + u8 sample_slow; + + /* current MCS group to be sampled */ + u8 sample_group; + + /* MCS rate group info and statistics */ + struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS]; +}; + +struct minstrel_ht_sta_priv { + union { + struct minstrel_ht_sta ht; + struct minstrel_sta_info legacy; + }; +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *dbg_stats; +#endif + void *ratelist; + void *sample_table; + bool is_ht; +}; + +void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); +void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta); + +#endif diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c new file mode 100644 index 0000000..4fb3ccb --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2010 Felix Fietkau + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include "rc80211_minstrel.h" +#include "rc80211_minstrel_ht.h" + +extern const struct mcs_group minstrel_mcs_groups[]; + +static int +minstrel_ht_stats_open(struct inode *inode, struct file *file) +{ + struct minstrel_ht_sta_priv *msp = inode->i_private; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_debugfs_info *ms; + unsigned int i, j, tp, prob, eprob; + char *p; + int ret; + + if (!msp->is_ht) { + inode->i_private = &msp->legacy; + ret = minstrel_stats_open(inode, file); + inode->i_private = msp; + return ret; + } + + ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL); + if (!ms) + return -ENOMEM; + + file->private_data = ms; + p = ms->buf; + p += sprintf(p, "type rate throughput ewma prob this prob " + "this succ/attempt success attempts\n"); + for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) { + char htmode = '2'; + char gimode = 'L'; + + if (!mi->groups[i].supported) + continue; + + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + htmode = '4'; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) + gimode = 'S'; + + for (j = 0; j < MCS_GROUP_RATES; j++) { + struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; + int idx = i * MCS_GROUP_RATES + j; + + if (!(mi->groups[i].supported & BIT(j))) + continue; + + p += sprintf(p, "HT%c0/%cGI ", htmode, gimode); + + *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' '; + *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; + p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) * + MCS_GROUP_RATES + j); + + tp = mr->cur_tp / 10; + prob = MINSTREL_TRUNC(mr->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mr->probability * 1000); + + p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " + "%3u(%3u) %8llu %8llu\n", + tp / 10, tp % 10, + eprob / 10, eprob % 10, + prob / 10, prob % 10, + mr->last_success, + mr->last_attempts, + (unsigned long long)mr->succ_hist, + (unsigned long long)mr->att_hist); + } + } + p += sprintf(p, "\nTotal packet count:: ideal %d " + "lookaround %d\n", + max(0, (int) mi->total_packets - (int) mi->sample_packets), + mi->sample_packets); + p += sprintf(p, "Average A-MPDU length: %d.%d\n", + MINSTREL_TRUNC(mi->avg_ampdu_len), + MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); + ms->len = p - ms->buf; + + return 0; +} + +static const struct file_operations minstrel_ht_stat_fops = { + .owner = THIS_MODULE, + .open = minstrel_ht_stats_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, +}; + +void +minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp, + &minstrel_ht_stat_fops); +} + +void +minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + debugfs_remove(msp->dbg_stats); +} -- cgit v1.1 From aed8e1f9910b216ab3e14cb286c431c870f9b78f Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Mon, 17 May 2010 17:30:59 +0200 Subject: cfg80211: don't refuse HT20 channels on devices that don't support HT40 Don't refuse HT20 channels on devices that don't support HT40. Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- net/wireless/chan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/chan.c b/net/wireless/chan.c index b01a6f6..d0c92dd 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -35,8 +35,9 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev, if (!ht_cap->ht_supported) return NULL; - if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || - ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) + if (channel_type != NL80211_CHAN_HT20 && + (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || + ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT)) return NULL; } -- cgit v1.1 From 77c2061d10a408d0220c2b0e7faefe52d9c41008 Mon Sep 17 00:00:00 2001 From: Walter Goldens Date: Tue, 18 May 2010 04:44:54 -0700 Subject: wireless: fix several minor description typos Signed-off-by: Walter Goldens Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 +- net/mac80211/status.c | 2 +- net/mac80211/work.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0839c4e..31e3386 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1763,7 +1763,7 @@ static void ieee80211_sta_work(struct work_struct *work) /* * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the the rest. + * here we'll pick the rest. */ if (WARN(local->suspended, "STA MLME work scheduled while " "going to suspend\n")) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 94613af0..34da679 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -47,7 +47,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, /* * This skb 'survived' a round-trip through the driver, and * hopefully the driver didn't mangle it too badly. However, - * we can definitely not rely on the the control information + * we can definitely not rely on the control information * being correct. Clear it so we don't get junk there, and * indicate that it needs new processing, but must not be * modified/encrypted again. diff --git a/net/mac80211/work.c b/net/mac80211/work.c index be3d4a69..4157717 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -840,7 +840,7 @@ static void ieee80211_work_work(struct work_struct *work) /* * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the the rest. + * here we'll pick the rest. */ if (WARN(local->suspended, "work scheduled while going to suspend\n")) return; -- cgit v1.1 From 252aa631f88080920a7083ac5a5844ffc5463629 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 19 May 2010 12:17:12 +0200 Subject: cfg80211: make action channel type optional When sending action frames, we want to verify that we do that on the correct channel. However, checking the channel type in addition can get in the way, since the channel type could change on the fly during an association, and it's not useful to have the channel type anyway since it has no effect on the transmission. Therefore, make it optional to specify so that if wanted, it can still be checked, but is not required. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 4 +++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 9 ++++++--- net/wireless/core.h | 1 + net/wireless/mlme.c | 3 ++- net/wireless/nl80211.c | 3 +++ 6 files changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c7000a6..f8c49c5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1554,10 +1554,12 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie) { return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan, - channel_type, buf, len, cookie); + channel_type, channel_type_valid, + buf, len, cookie); } struct cfg80211_ops mac80211_config_ops = { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1a9e2da..d4677ef 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -988,6 +988,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie); ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 31e3386..29c3a75 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2308,6 +2308,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_local *local = sdata->local; @@ -2315,9 +2316,11 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb; /* Check that we are on the requested channel for transmission */ - if ((chan != local->tmp_channel || - channel_type != local->tmp_channel_type) && - (chan != local->oper_channel || + if (chan != local->tmp_channel && + chan != local->oper_channel) + return -EBUSY; + if (channel_type_valid && + (channel_type != local->tmp_channel_type && channel_type != local->_oper_channel_type)) return -EBUSY; diff --git a/net/wireless/core.h b/net/wireless/core.h index ae930ac..63d57ae 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -339,6 +339,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie); /* SME */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 48ead6f..f69ae19 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -827,6 +827,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -855,7 +856,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, /* Transmit the Action frame as requested by user space */ return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, - buf, len, cookie); + channel_type_valid, buf, len, cookie); } bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index db71150..90ab3c8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4681,6 +4681,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) struct net_device *dev; struct ieee80211_channel *chan; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + bool channel_type_valid = false; u32 freq; int err; void *hdr; @@ -4722,6 +4723,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out; } + channel_type_valid = true; } freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); @@ -4745,6 +4747,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) goto free_msg; } err = cfg80211_mlme_action(rdev, dev, chan, channel_type, + channel_type_valid, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), &cookie); -- cgit v1.1 From b5f7e7554753e2cc3ef3bef0271fdb32027df2ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jun 2010 12:05:27 +0000 Subject: ipv4: add LINUX_MIB_IPRPFILTER snmp counter Christoph Lameter mentioned that packets could be dropped in input path because of rp_filter settings, without any SNMP counter being incremented. System administrator can have a hard time to track the problem. This patch introduces a new counter, LINUX_MIB_IPRPFILTER, incremented each time we drop a packet because Reverse Path Filter triggers. (We receive an IPv4 datagram on a given interface, and find the route to send an answer would use another interface) netstat -s | grep IPReversePathFilter IPReversePathFilter: 21714 Reported-by: Christoph Lameter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 6 ++++-- net/ipv4/ip_input.c | 3 +++ net/ipv4/proc.c | 1 + net/ipv4/route.c | 31 ++++++++++++++++++------------- 4 files changed, 26 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4f0ed45..e830f7a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -284,7 +284,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (no_addr) goto last_resort; if (rpf == 1) - goto e_inval; + goto e_rpf; fl.oif = dev->ifindex; ret = 0; @@ -299,7 +299,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, last_resort: if (rpf) - goto e_inval; + goto e_rpf; *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); *itag = 0; return 0; @@ -308,6 +308,8 @@ e_inval_res: fib_res_put(&res); e_inval: return -EINVAL; +e_rpf: + return -EXDEV; } static inline __be32 sk_extract_addr(struct sockaddr *addr) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d930dc5..d52c9da 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -340,6 +340,9 @@ static int ip_rcv_finish(struct sk_buff *skb) else if (err == -ENETUNREACH) IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_INNOROUTES); + else if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); goto drop; } } diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3dc9914..e320ca6 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), + SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8495bce..d377b45 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1851,6 +1851,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, __be32 spec_dst; struct in_device *in_dev = in_dev_get(dev); u32 itag = 0; + int err; /* Primary sanity checks. */ @@ -1865,10 +1866,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!ipv4_is_local_multicast(daddr)) goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); - } else if (fib_validate_source(saddr, 0, tos, 0, - dev, &spec_dst, &itag, 0) < 0) - goto e_inval; - + } else { + err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, + &itag, 0); + if (err < 0) + goto e_err; + } rth = dst_alloc(&ipv4_dst_ops); if (!rth) goto e_nobufs; @@ -1920,8 +1923,10 @@ e_nobufs: return -ENOBUFS; e_inval: + err = -EINVAL; +e_err: in_dev_put(in_dev); - return -EINVAL; + return err; } @@ -1985,7 +1990,6 @@ static int __mkroute_input(struct sk_buff *skb, ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); - err = -EINVAL; goto cleanup; } @@ -2157,13 +2161,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; if (res.type == RTN_LOCAL) { - int result; - result = fib_validate_source(saddr, daddr, tos, + err = fib_validate_source(saddr, daddr, tos, net->loopback_dev->ifindex, dev, &spec_dst, &itag, skb->mark); - if (result < 0) - goto martian_source; - if (result) + if (err < 0) + goto martian_source_keep_err; + if (err) flags |= RTCF_DIRECTSRC; spec_dst = daddr; goto local_input; @@ -2191,7 +2194,7 @@ brd_input: err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, skb->mark); if (err < 0) - goto martian_source; + goto martian_source_keep_err; if (err) flags |= RTCF_DIRECTSRC; } @@ -2272,8 +2275,10 @@ e_nobufs: goto done; martian_source: + err = -EINVAL; +martian_source_keep_err: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); - goto e_inval; + goto done; } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, -- cgit v1.1 From 96d362202bfc0e5da78ee59b1645296fbca515f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Jun 2010 19:21:31 +0000 Subject: ipv4: RCU conversion of ip_route_input_slow/ip_route_input_mc Avoid two atomic ops on struct in_device refcount per incoming packet, if slow path taken, (or route cache disabled) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d377b45..1cfe0d1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1843,13 +1843,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->rt_type = res->type; } +/* called in rcu_read_lock() section */ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { - unsigned hash; + unsigned int hash; struct rtable *rth; __be32 spec_dst; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; int err; @@ -1914,18 +1915,14 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); - in_dev_put(in_dev); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); e_nobufs: - in_dev_put(in_dev); return -ENOBUFS; - e_inval: - err = -EINVAL; + return -EINVAL; e_err: - in_dev_put(in_dev); return err; } @@ -2101,7 +2098,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct fib_result res; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct flowi fl = { .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, @@ -2179,7 +2176,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); done: - in_dev_put(in_dev); if (free_res) fib_res_put(&res); out: return err; @@ -2288,16 +2284,18 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned hash; int iif = dev->ifindex; struct net *net; + int res; net = dev_net(dev); + rcu_read_lock(); + if (!rt_caching(net)) goto skip_cache; tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif, rt_genid(net)); - rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->u.dst.rt_next)) { if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | @@ -2321,7 +2319,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, } RT_CACHE_STAT_INC(in_hlist_search); } - rcu_read_unlock(); skip_cache: /* Multicast recognition logic is moved from route cache to here. @@ -2336,12 +2333,11 @@ skip_cache: route cache entry is created eventually. */ if (ipv4_is_multicast(daddr)) { - struct in_device *in_dev; + struct in_device *in_dev = __in_dev_get_rcu(dev); - rcu_read_lock(); - if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { + if (in_dev) { int our = ip_check_mc(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE || @@ -2349,15 +2345,18 @@ skip_cache: IN_DEV_MFORWARD(in_dev)) #endif ) { + int res = ip_route_input_mc(skb, daddr, saddr, + tos, dev, our); rcu_read_unlock(); - return ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); + return res; } } rcu_read_unlock(); return -EINVAL; } - return ip_route_input_slow(skb, daddr, saddr, tos, dev); + res = ip_route_input_slow(skb, daddr, saddr, tos, dev); + rcu_read_unlock(); + return res; } EXPORT_SYMBOL(ip_route_input_common); -- cgit v1.1 From bc10502dba37d3b210efd9f3867212298f13b78e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 03:21:52 -0700 Subject: net: use __packed annotation cleanup patch. Use new __packed annotation in net/ and include/ (except netfilter) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bluetooth/bnep/bnep.h | 8 ++++---- net/compat.c | 6 +++--- net/iucv/iucv.c | 14 +++++++------- net/mac80211/cfg.c | 2 +- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/rx.c | 2 +- net/sctp/sm_make_chunk.c | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 0d9e506..70672544 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -86,26 +86,26 @@ struct bnep_setup_conn_req { __u8 ctrl; __u8 uuid_size; __u8 service[0]; -} __attribute__((packed)); +} __packed; struct bnep_set_filter_req { __u8 type; __u8 ctrl; __be16 len; __u8 list[0]; -} __attribute__((packed)); +} __packed; struct bnep_control_rsp { __u8 type; __u8 ctrl; __be16 resp; -} __attribute__((packed)); +} __packed; struct bnep_ext_hdr { __u8 type; __u8 len; __u8 data[0]; -} __attribute__((packed)); +} __packed; /* BNEP ioctl defines */ #define BNEPCONNADD _IOW('B', 200, int) diff --git a/net/compat.c b/net/compat.c index ec24d9e..1cf7590 100644 --- a/net/compat.c +++ b/net/compat.c @@ -531,7 +531,7 @@ struct compat_group_req { __u32 gr_interface; struct __kernel_sockaddr_storage gr_group __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; struct compat_group_source_req { __u32 gsr_interface; @@ -539,7 +539,7 @@ struct compat_group_source_req { __attribute__ ((aligned(4))); struct __kernel_sockaddr_storage gsr_source __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; struct compat_group_filter { __u32 gf_interface; @@ -549,7 +549,7 @@ struct compat_group_filter { __u32 gf_numsrc; struct __kernel_sockaddr_storage gf_slist[1] __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ sizeof(struct __kernel_sockaddr_storage)) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index f28ad2cc..499c045 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1463,7 +1463,7 @@ struct iucv_path_pending { u32 res3; u8 ippollfg; u8 res4[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_pending(struct iucv_irq_data *data) { @@ -1524,7 +1524,7 @@ struct iucv_path_complete { u32 res3; u8 ippollfg; u8 res4[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_complete(struct iucv_irq_data *data) { @@ -1554,7 +1554,7 @@ struct iucv_path_severed { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_severed(struct iucv_irq_data *data) { @@ -1590,7 +1590,7 @@ struct iucv_path_quiesced { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_quiesced(struct iucv_irq_data *data) { @@ -1618,7 +1618,7 @@ struct iucv_path_resumed { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_resumed(struct iucv_irq_data *data) { @@ -1649,7 +1649,7 @@ struct iucv_message_complete { u32 ipbfln2f; u8 ippollfg; u8 res2[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_message_complete(struct iucv_irq_data *data) { @@ -1694,7 +1694,7 @@ struct iucv_message_pending { u32 ipbfln2f; u8 ippollfg; u8 res2[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_message_pending(struct iucv_irq_data *data) { diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c7000a6..a2ed0f7b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -600,7 +600,7 @@ struct iapp_layer2_update { u8 ssap; /* 0 */ u8 control; u8 xid_info[3]; -} __attribute__ ((packed)); +} __packed; static void ieee80211_send_layer2_update(struct sta_info *sta) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1a9e2da..ec3e5c3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1084,7 +1084,7 @@ struct ieee80211_tx_status_rtap_hdr { u8 padding_for_rate; __le16 tx_flags; u8 data_retries; -} __attribute__ ((packed)); +} __packed; /* HT */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2a7bc..2d9a2ee 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2139,7 +2139,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, u8 rate_or_pad; __le16 chan_freq; __le16 chan_flags; - } __attribute__ ((packed)) *rthdr; + } __packed *rthdr; struct sk_buff *skb = rx->skb, *skb2; struct net_device *prev_dev = NULL; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index bd2a50b..246f929 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1817,7 +1817,7 @@ malformed: struct __sctp_missing { __be32 num_missing; __be16 type; -} __attribute__((packed)); +} __packed; /* * Report a missing mandatory parameter. -- cgit v1.1 From db2c24175d149b55784f7cb2c303622ce962c1ae Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 2 Jun 2010 04:55:02 +0000 Subject: act_pedit: access skb->data safely access skb->data safely we should use skb_header_pointer() and skb_store_bits() to access skb->data to handle small or non-linear skbs. Signed-off-by: Changli Gao ---- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fdbd0b7..50e3d94 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -125,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, { struct tcf_pedit *p = a->priv; int i, munged = 0; - u8 *pptr; + unsigned int off; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ @@ -134,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } } - pptr = skb_network_header(skb); + off = skb_network_offset(skb); spin_lock(&p->tcf_lock); @@ -144,17 +144,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tc_pedit_key *tkey = p->tcfp_keys; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { - u32 *ptr; + u32 *ptr, _data; int offset = tkey->off; if (tkey->offmask) { - if (skb->len > tkey->at) { - char *j = pptr + tkey->at; - offset += ((*j & tkey->offmask) >> - tkey->shift); - } else { + char *d, _d; + + d = skb_header_pointer(skb, off + tkey->at, 1, + &_d); + if (!d) goto bad; - } + offset += (*d & tkey->offmask) >> tkey->shift; } if (offset % 4) { @@ -169,9 +169,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, goto bad; } - ptr = (u32 *)(pptr+offset); + ptr = skb_header_pointer(skb, off + offset, 4, &_data); + if (!ptr) + goto bad; /* just do it, baby */ *ptr = ((*ptr & tkey->mask) ^ tkey->val); + if (ptr == &_data) + skb_store_bits(skb, off + offset, ptr, 4); munged++; } -- cgit v1.1 From 3a4d4aa2d38e5305b5e93dffdc9dd2f975129328 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 May 2010 16:41:40 +0200 Subject: mac80211: remove bogus mod_timer() call The IBSS code has a bogus mod_timer(..., 0) call, we shouldn't ever pass a constant value to the function since any constant value could be in the future or the past. However, invoking the timer here is not necessary at all, since we just finished scanning and just need to have the IBSS code run again from the workqueue later, so factor out the work starting and use that instead. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index b2cc1fd..d7a96ce 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -798,6 +798,15 @@ static void ieee80211_ibss_work(struct work_struct *work) } } +static void ieee80211_queue_ibss_work(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + + set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); + ieee80211_queue_work(&local->hw, &ifibss->work); +} + static void ieee80211_ibss_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = @@ -810,8 +819,7 @@ static void ieee80211_ibss_timer(unsigned long data) return; } - set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); - ieee80211_queue_work(&local->hw, &ifibss->work); + ieee80211_queue_ibss_work(sdata); } #ifdef CONFIG_PM @@ -859,7 +867,7 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) if (!sdata->u.ibss.ssid_len) continue; sdata->u.ibss.last_scan_completed = jiffies; - mod_timer(&sdata->u.ibss.timer, 0); + ieee80211_queue_ibss_work(sdata); } mutex_unlock(&local->iflist_mtx); } -- cgit v1.1 From 2b2c009ecf71f4c66ff8420b63dddbc9737e04e3 Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Thu, 27 May 2010 15:32:13 +0300 Subject: mac80211: Add support for hardware ARP query filtering Some hardware allow extended filtering of ARP frames not intended for the host. To perform such filtering, the hardware needs to know the current IP address(es) of the host, bound to its interface. Add support for ARP filtering to mac80211 by adding a new op to the driver interface, allowing to configure the current IP addresses. This op is called upon association with the currently configured address(es), and when associated whenever the IP address(es) change. This patch adds configuration of IPv4 addresses only, as IPv6 addresses don't need ARP filtering. Signed-off-by: Juuso Oikarinen Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/driver-ops.h | 17 ++++++++++++ net/mac80211/driver-trace.h | 25 ++++++++++++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/main.c | 63 ++++++++++++++++++++++++++++++++++++++++++++- net/mac80211/mlme.c | 11 +++++++- 5 files changed, 116 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4f22713..978850e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -83,6 +83,23 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, trace_drv_bss_info_changed(local, sdata, info, changed); } +struct in_ifaddr; +static inline int drv_configure_arp_filter(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct in_ifaddr *ifa_list) +{ + int ret = 0; + + might_sleep(); + + if (local->ops->configure_arp_filter) + ret = local->ops->configure_arp_filter(&local->hw, vif, + ifa_list); + + trace_drv_configure_arp_filter(local, vif_to_sdata(vif), ifa_list, ret); + return ret; +} + static inline u64 drv_prepare_multicast(struct ieee80211_local *local, struct netdev_hw_addr_list *mc_list) { diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6a9b234..577460d 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -219,6 +219,31 @@ TRACE_EVENT(drv_bss_info_changed, ) ); +TRACE_EVENT(drv_configure_arp_filter, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct in_ifaddr *ifa_list, int ret), + + TP_ARGS(local, sdata, ifa_list, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->ret = ret; + ), + + TP_printk( + VIF_PR_FMT LOCAL_PR_FMT " ret:%d", + VIF_PR_ARG, LOCAL_PR_ARG, __entry->ret + ) +); + TRACE_EVENT(drv_prepare_multicast, TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret), diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d4677ef..47d6753 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -851,6 +851,7 @@ struct ieee80211_local { struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; struct notifier_block network_latency_notifier; + struct notifier_block ifa_notifier; int user_power_level; /* in dBm */ int power_constr_level; /* in dBm */ @@ -997,6 +998,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local, void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); +int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_sw_ie *sw_elem, struct ieee80211_bss *bss, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c8548e61..4051b23 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -329,6 +329,58 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) mutex_unlock(&local->iflist_mtx); } +int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata) +{ + struct in_device *idev; + int ret = 0; + + BUG_ON(!sdata); + ASSERT_RTNL(); + + idev = sdata->dev->ip_ptr; + if (!idev) + return 0; + + ret = drv_configure_arp_filter(sdata->local, &sdata->vif, + idev->ifa_list); + return ret; +} + +static int ieee80211_ifa_changed(struct notifier_block *nb, + unsigned long data, void *arg) +{ + struct in_ifaddr *ifa = arg; + struct ieee80211_local *local = + container_of(nb, struct ieee80211_local, + ifa_notifier); + struct net_device *ndev = ifa->ifa_dev->dev; + struct wireless_dev *wdev = ndev->ieee80211_ptr; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_managed *ifmgd; + + /* Make sure it's our interface that got changed */ + if (!wdev) + return NOTIFY_DONE; + + if (wdev->wiphy != local->hw.wiphy) + return NOTIFY_DONE; + + /* We are concerned about IP addresses only when associated */ + sdata = IEEE80211_DEV_TO_SUB_IF(ndev); + + /* ARP filtering is only supported in managed mode */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return NOTIFY_DONE; + + ifmgd = &sdata->u.mgd; + mutex_lock(&ifmgd->mtx); + if (ifmgd->associated) + ieee80211_set_arp_filter(sdata); + mutex_unlock(&ifmgd->mtx); + + return NOTIFY_DONE; +} + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -612,14 +664,22 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); - if (result) { rtnl_lock(); goto fail_pm_qos; } + local->ifa_notifier.notifier_call = ieee80211_ifa_changed; + result = register_inetaddr_notifier(&local->ifa_notifier); + if (result) + goto fail_ifa; + return 0; + fail_ifa: + pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, + &local->network_latency_notifier); + rtnl_lock(); fail_pm_qos: ieee80211_led_exit(local); ieee80211_remove_interfaces(local); @@ -647,6 +707,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); + unregister_inetaddr_notifier(&local->ifa_notifier); rtnl_lock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 29c3a75..7e72013 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2078,8 +2078,17 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta); return WORK_DONE_DESTROY; + } else { + mutex_unlock(&wk->sdata->u.mgd.mtx); + + /* + * configure ARP filter IP addresses to the driver, + * intentionally outside the mgd mutex. + */ + rtnl_lock(); + ieee80211_set_arp_filter(wk->sdata); + rtnl_unlock(); } - mutex_unlock(&wk->sdata->u.mgd.mtx); } cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len); -- cgit v1.1 From 6a8579d0e62c0eac428184ce45e86bc46677724a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 May 2010 14:41:07 +0200 Subject: mac80211: clean up ieee80211_stop_tx_ba_session There's no sense in letting anything but internal mac80211 functions set the initiator to anything but WLAN_BACK_INITIATOR, since WLAN_BACK_RECIPIENT is only valid when we have received a frame from the peer, which we react to directly in mac80211. The debugfs code I recently added got this wrong as well. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 7 +++---- net/mac80211/debugfs_sta.c | 3 +-- net/mac80211/driver-trace.h | 10 ++++------ 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c163d0a..feb15c4 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -538,14 +538,13 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return ret; } -int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, - enum ieee80211_back_parties initiator) +int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - trace_api_stop_tx_ba_session(pubsta, tid, initiator); + trace_api_stop_tx_ba_session(pubsta, tid); if (!local->ops->ampdu_action) return -EINVAL; @@ -553,7 +552,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (tid >= STA_TID_NUM) return -EINVAL; - return __ieee80211_stop_tx_ba_session(sta, tid, initiator); + return __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index e763f15..9f14061 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -210,8 +210,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (start) ret = ieee80211_start_tx_ba_session(&sta->sta, tid); else - ret = ieee80211_stop_tx_ba_session(&sta->sta, tid, - WLAN_BACK_RECIPIENT); + ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3); ret = 0; diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 577460d..6b906301 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -876,25 +876,23 @@ TRACE_EVENT(api_start_tx_ba_cb, ); TRACE_EVENT(api_stop_tx_ba_session, - TP_PROTO(struct ieee80211_sta *sta, u16 tid, u16 initiator), + TP_PROTO(struct ieee80211_sta *sta, u16 tid), - TP_ARGS(sta, tid, initiator), + TP_ARGS(sta, tid), TP_STRUCT__entry( STA_ENTRY __field(u16, tid) - __field(u16, initiator) ), TP_fast_assign( STA_ASSIGN; __entry->tid = tid; - __entry->initiator = initiator; ), TP_printk( - STA_PR_FMT " tid:%d initiator:%d", - STA_PR_ARG, __entry->tid, __entry->initiator + STA_PR_FMT " tid:%d", + STA_PR_ARG, __entry->tid ) ); -- cgit v1.1 From 761ab470364b550c9b1a5e1e31be51d415aaf42b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 May 2010 14:24:19 +0200 Subject: mac80211: move WEP weak IV check I suspect the compiler will do this optimisation anyway, but it seems cleaner to move this into the WEP switch case. Also make rx_h_decrypt use a local variable for the frame_control so that we don't need to reload the hdr variable for this after linearizing. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6e2a7bc..0ade267 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -825,6 +825,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) ieee80211_rx_result result = RX_DROP_UNUSABLE; struct ieee80211_key *stakey = NULL; int mmie_keyidx = -1; + __le16 fc; /* * Key selection 101 @@ -866,13 +867,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (rx->sta) stakey = rcu_dereference(rx->sta->key); - if (!ieee80211_has_protected(hdr->frame_control)) + fc = hdr->frame_control; + + if (!ieee80211_has_protected(fc)) mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); if (!is_multicast_ether_addr(hdr->addr1) && stakey) { rx->key = stakey; /* Skip decryption if the frame is not protected. */ - if (!ieee80211_has_protected(hdr->frame_control)) + if (!ieee80211_has_protected(fc)) return RX_CONTINUE; } else if (mmie_keyidx >= 0) { /* Broadcast/multicast robust management frame / BIP */ @@ -884,7 +887,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) return RX_DROP_MONITOR; /* unexpected BIP keyidx */ rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); - } else if (!ieee80211_has_protected(hdr->frame_control)) { + } else if (!ieee80211_has_protected(fc)) { /* * The frame was not protected, so skip decryption. However, we * need to set rx->key if there is a key that could have been @@ -892,7 +895,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * have been expected. */ struct ieee80211_key *key = NULL; - if (ieee80211_is_mgmt(hdr->frame_control) && + if (ieee80211_is_mgmt(fc) && is_multicast_ether_addr(hdr->addr1) && (key = rcu_dereference(rx->sdata->default_mgmt_key))) rx->key = key; @@ -914,7 +917,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - hdrlen = ieee80211_hdrlen(hdr->frame_control); + hdrlen = ieee80211_hdrlen(fc); if (rx->skb->len < 8 + hdrlen) return RX_DROP_UNUSABLE; /* TODO: count this? */ @@ -947,19 +950,17 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; - - hdr = (struct ieee80211_hdr *)rx->skb->data; - - /* Check for weak IVs if possible */ - if (rx->sta && rx->key->conf.alg == ALG_WEP && - ieee80211_is_data(hdr->frame_control) && - (!(status->flag & RX_FLAG_IV_STRIPPED) || - !(status->flag & RX_FLAG_DECRYPTED)) && - ieee80211_wep_is_weak_iv(rx->skb, rx->key)) - rx->sta->wep_weak_iv_count++; + /* the hdr variable is invalid now! */ switch (rx->key->conf.alg) { case ALG_WEP: + /* Check for weak IVs if possible */ + if (rx->sta && ieee80211_is_data(fc) && + (!(status->flag & RX_FLAG_IV_STRIPPED) || + !(status->flag & RX_FLAG_DECRYPTED)) && + ieee80211_wep_is_weak_iv(rx->skb, rx->key)) + rx->sta->wep_weak_iv_count++; + result = ieee80211_crypto_wep_decrypt(rx); break; case ALG_TKIP: -- cgit v1.1 From e0961f112cd88176acc6d1af6ca6352f85cdf993 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 May 2010 14:48:52 +0200 Subject: mac80211: remove useless work starting Ever since we use only cfg80211 for configuration, there is no configuration that could be pending at this point, cfg80211 will have the configuration that is pending and apply it afterwards. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/iface.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 50deb01..3d3a094 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -321,15 +321,6 @@ static int ieee80211_open(struct net_device *dev) ieee80211_recalc_ps(local, -1); - /* - * ieee80211_sta_work is disabled while network interface - * is down. Therefore, some configuration changes may not - * yet be effective. Trigger execution of ieee80211_sta_work - * to fix this. - */ - if (sdata->vif.type == NL80211_IFTYPE_STATION) - ieee80211_queue_work(&local->hw, &sdata->u.mgd.work); - netif_tx_start_all_queues(dev); return 0; -- cgit v1.1 From 08daecaead42b85b69b33d7d8429a93dfbf75b58 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 30 May 2010 14:53:43 +0200 Subject: mac80211: drop control frames after processing After ieee80211_rx_h_ctrl() processing we only want to process management (including action) frames, so there's no point in letting control frames continue. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0ade267..0b9898a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1844,7 +1844,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) return RX_QUEUED; } - return RX_CONTINUE; + /* + * After this point, we only want management frames, + * so we can drop all remaining control frames to + * cooked monitor interfaces. + */ + return RX_DROP_MONITOR; } static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, -- cgit v1.1 From fcea60070fe8fa48df579f155ec7bc20a868f7dc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 31 May 2010 11:40:23 +0200 Subject: mac80211: move plink state For some odd reason, the plink_state enum is declared in the middle between aggregation related structures. Move it down to make the file easier to read. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/sta_info.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index df9d455..813da34 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -120,6 +120,28 @@ struct tid_ampdu_rx { }; /** + * struct sta_ampdu_mlme - STA aggregation information. + * + * @tid_active_rx: TID's state in Rx session state machine. + * @tid_rx: aggregation info for Rx per TID + * @tid_state_tx: TID's state in Tx session state machine. + * @tid_tx: aggregation info for Tx per TID + * @addba_req_num: number of times addBA request has been sent. + * @dialog_token_allocator: dialog token enumerator for each new session; + */ +struct sta_ampdu_mlme { + /* rx */ + bool tid_active_rx[STA_TID_NUM]; + struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; + /* tx */ + u8 tid_state_tx[STA_TID_NUM]; + struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; + u8 addba_req_num[STA_TID_NUM]; + u8 dialog_token_allocator; +}; + + +/** * enum plink_state - state of a mesh peer link finite state machine * * @PLINK_LISTEN: initial state, considered the implicit state of non existant @@ -143,28 +165,6 @@ enum plink_state { }; /** - * struct sta_ampdu_mlme - STA aggregation information. - * - * @tid_active_rx: TID's state in Rx session state machine. - * @tid_rx: aggregation info for Rx per TID - * @tid_state_tx: TID's state in Tx session state machine. - * @tid_tx: aggregation info for Tx per TID - * @addba_req_num: number of times addBA request has been sent. - * @dialog_token_allocator: dialog token enumerator for each new session; - */ -struct sta_ampdu_mlme { - /* rx */ - bool tid_active_rx[STA_TID_NUM]; - struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; - /* tx */ - u8 tid_state_tx[STA_TID_NUM]; - struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; - u8 addba_req_num[STA_TID_NUM]; - u8 dialog_token_allocator; -}; - - -/** * struct sta_info - STA information * * This structure collects information about a station that -- cgit v1.1 From ad0e2b5a00dbec303e4682b403bb6703d11dcdb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Jun 2010 10:19:19 +0200 Subject: mac80211: simplify key locking Since I recently made station management able to sleep, I can now rework key management as well; since it will no longer need a spinlock and can also use a mutex instead, a bunch of code to allow drivers' set_key to sleep while key management is protected by a spinlock can now be removed. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 18 ++- net/mac80211/ieee80211_i.h | 4 +- net/mac80211/iface.c | 5 +- net/mac80211/key.c | 288 +++++++++++---------------------------------- net/mac80211/key.h | 22 +--- net/mac80211/main.c | 2 +- net/mac80211/sta_info.c | 8 -- 7 files changed, 80 insertions(+), 267 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f8c49c5..952845e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -120,6 +120,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_key *key; int err; + if (!netif_running(dev)) + return -ENETDOWN; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (params->cipher) { @@ -145,7 +148,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (!key) return -ENOMEM; - rcu_read_lock(); + mutex_lock(&sdata->local->sta_mtx); if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); @@ -160,7 +163,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, err = 0; out_unlock: - rcu_read_unlock(); + mutex_unlock(&sdata->local->sta_mtx); return err; } @@ -174,7 +177,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - rcu_read_lock(); + mutex_lock(&sdata->local->sta_mtx); if (mac_addr) { ret = -ENOENT; @@ -202,7 +205,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, ret = 0; out_unlock: - rcu_read_unlock(); + mutex_unlock(&sdata->local->sta_mtx); return ret; } @@ -305,15 +308,10 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ieee80211_set_default_key(sdata, key_idx); - rcu_read_unlock(); - return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 47d6753..4d3883e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -746,10 +746,10 @@ struct ieee80211_local { struct mutex iflist_mtx; /* - * Key lock, protects sdata's key_list and sta_info's + * Key mutex, protects sdata's key_list and sta_info's * key pointers (write access, they're RCU.) */ - spinlock_t key_lock; + struct mutex key_mtx; /* Scanning and BSS list */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3d3a094..1afa9ec 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -268,7 +268,6 @@ static int ieee80211_open(struct net_device *dev) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_enable_keys(sdata); if (sdata->vif.type == NL80211_IFTYPE_STATION) netif_carrier_off(dev); @@ -522,8 +521,8 @@ static int ieee80211_stop(struct net_device *dev) BSS_CHANGED_BEACON_ENABLED); } - /* disable all keys for as long as this netdev is down */ - ieee80211_disable_keys(sdata); + /* free all remaining keys, there shouldn't be any */ + ieee80211_free_keys(sdata); drv_remove_interface(local, &sdata->vif); } diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e8f6e3b..d0d9001 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -36,80 +36,20 @@ * There is currently no way of knowing this except by looking into * debugfs. * - * All key operations are protected internally so you can call them at - * any time. + * All key operations are protected internally. * * Within mac80211, key references are, just as STA structure references, * protected by RCU. Note, however, that some things are unprotected, * namely the key->sta dereferences within the hardware acceleration - * functions. This means that sta_info_destroy() must flush the key todo - * list. - * - * All the direct key list manipulation functions must not sleep because - * they can operate on STA info structs that are protected by RCU. + * functions. This means that sta_info_destroy() must remove the key + * which waits for an RCU grace period. */ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; -/* key mutex: used to synchronise todo runners */ -static DEFINE_MUTEX(key_mutex); -static DEFINE_SPINLOCK(todo_lock); -static LIST_HEAD(todo_list); - -static void key_todo(struct work_struct *work) +static void assert_key_lock(struct ieee80211_local *local) { - ieee80211_key_todo(); -} - -static DECLARE_WORK(todo_work, key_todo); - -/** - * add_todo - add todo item for a key - * - * @key: key to add to do item for - * @flag: todo flag(s) - * - * Must be called with IRQs or softirqs disabled. - */ -static void add_todo(struct ieee80211_key *key, u32 flag) -{ - if (!key) - return; - - spin_lock(&todo_lock); - key->flags |= flag; - /* - * Remove again if already on the list so that we move it to the end. - */ - if (!list_empty(&key->todo)) - list_del(&key->todo); - list_add_tail(&key->todo, &todo_list); - schedule_work(&todo_work); - spin_unlock(&todo_lock); -} - -/** - * ieee80211_key_lock - lock the mac80211 key operation lock - * - * This locks the (global) mac80211 key operation lock, all - * key operations must be done under this lock. - */ -static void ieee80211_key_lock(void) -{ - mutex_lock(&key_mutex); -} - -/** - * ieee80211_key_unlock - unlock the mac80211 key operation lock - */ -static void ieee80211_key_unlock(void) -{ - mutex_unlock(&key_mutex); -} - -static void assert_key_lock(void) -{ - WARN_ON(!mutex_is_locked(&key_mutex)); + WARN_ON(!mutex_is_locked(&local->key_mtx)); } static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) @@ -126,12 +66,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) struct ieee80211_sta *sta; int ret; - assert_key_lock(); might_sleep(); if (!key->local->ops->set_key) return; + assert_key_lock(key->local); + sta = get_sta_for_key(key); sdata = key->sdata; @@ -142,11 +83,8 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); - if (!ret) { - spin_lock_bh(&todo_lock); + if (!ret) key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock_bh(&todo_lock); - } if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) printk(KERN_ERR "mac80211-%s: failed to set key " @@ -161,18 +99,15 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) struct ieee80211_sta *sta; int ret; - assert_key_lock(); might_sleep(); if (!key || !key->local->ops->set_key) return; - spin_lock_bh(&todo_lock); - if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { - spin_unlock_bh(&todo_lock); + assert_key_lock(key->local); + + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; - } - spin_unlock_bh(&todo_lock); sta = get_sta_for_key(key); sdata = key->sdata; @@ -191,9 +126,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) wiphy_name(key->local->hw.wiphy), key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); - spin_lock_bh(&todo_lock); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock_bh(&todo_lock); } static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, @@ -201,22 +134,24 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, { struct ieee80211_key *key = NULL; + assert_key_lock(sdata->local); + if (idx >= 0 && idx < NUM_DEFAULT_KEYS) key = sdata->keys[idx]; rcu_assign_pointer(sdata->default_key, key); - if (key) - add_todo(key, KEY_FLAG_TODO_DEFKEY); + if (key) { + ieee80211_debugfs_key_remove_default(key->sdata); + ieee80211_debugfs_key_add_default(key->sdata); + } } void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) { - unsigned long flags; - - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); __ieee80211_set_default_key(sdata, idx); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } static void @@ -224,24 +159,26 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) { struct ieee80211_key *key = NULL; + assert_key_lock(sdata->local); + if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = sdata->keys[idx]; rcu_assign_pointer(sdata->default_mgmt_key, key); - if (key) - add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY); + if (key) { + ieee80211_debugfs_key_remove_mgmt_default(key->sdata); + ieee80211_debugfs_key_add_mgmt_default(key->sdata); + } } void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) { - unsigned long flags; - - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); __ieee80211_set_default_mgmt_key(sdata, idx); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } @@ -352,7 +289,6 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); - INIT_LIST_HEAD(&key->todo); if (alg == ALG_CCMP) { /* @@ -382,12 +318,27 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, return key; } +static void __ieee80211_key_destroy(struct ieee80211_key *key) +{ + if (!key) + return; + + ieee80211_key_disable_hw_accel(key); + + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + if (key->conf.alg == ALG_AES_CMAC) + ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); + ieee80211_debugfs_key_remove(key); + + kfree(key); +} + void ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_key *old_key; - unsigned long flags; int idx; BUG_ON(!sdata); @@ -431,7 +382,7 @@ void ieee80211_key_link(struct ieee80211_key *key, } } - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); if (sta) old_key = sta->key; @@ -439,15 +390,13 @@ void ieee80211_key_link(struct ieee80211_key *key, old_key = sdata->keys[idx]; __ieee80211_key_replace(sdata, sta, old_key, key); + __ieee80211_key_destroy(old_key); - /* free old key later */ - add_todo(old_key, KEY_FLAG_TODO_DELETE); + ieee80211_debugfs_key_add(key); - add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS); - if (ieee80211_sdata_running(sdata)) - add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD); + ieee80211_key_enable_hw_accel(key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } static void __ieee80211_key_free(struct ieee80211_key *key) @@ -458,170 +407,65 @@ static void __ieee80211_key_free(struct ieee80211_key *key) if (key->sdata) __ieee80211_key_replace(key->sdata, key->sta, key, NULL); - - add_todo(key, KEY_FLAG_TODO_DELETE); + __ieee80211_key_destroy(key); } void ieee80211_key_free(struct ieee80211_key *key) { - unsigned long flags; + struct ieee80211_local *local; if (!key) return; - if (!key->sdata) { - /* The key has not been linked yet, simply free it - * and don't Oops */ - if (key->conf.alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - kfree(key); - return; - } + local = key->sdata->local; - spin_lock_irqsave(&key->sdata->local->key_lock, flags); + mutex_lock(&local->key_mtx); __ieee80211_key_free(key); - spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); + mutex_unlock(&local->key_mtx); } -/* - * To be safe against concurrent manipulations of the list (which shouldn't - * actually happen) we need to hold the spinlock. But under the spinlock we - * can't actually do much, so we defer processing to the todo list. Then run - * the todo list to be sure the operation and possibly previously pending - * operations are completed. - */ -static void ieee80211_todo_for_each_key(struct ieee80211_sub_if_data *sdata, - u32 todo_flags) +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; - unsigned long flags; - - might_sleep(); - - spin_lock_irqsave(&sdata->local->key_lock, flags); - list_for_each_entry(key, &sdata->key_list, list) - add_todo(key, todo_flags); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - - ieee80211_key_todo(); -} -void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) -{ ASSERT_RTNL(); if (WARN_ON(!ieee80211_sdata_running(sdata))) return; - ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_ADD); -} - -void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) -{ - ASSERT_RTNL(); - - ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_REMOVE); -} - -static void __ieee80211_key_destroy(struct ieee80211_key *key) -{ - if (!key) - return; - - ieee80211_key_disable_hw_accel(key); + mutex_lock(&sdata->local->key_mtx); - if (key->conf.alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.alg == ALG_AES_CMAC) - ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); - ieee80211_debugfs_key_remove(key); + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_enable_hw_accel(key); - kfree(key); + mutex_unlock(&sdata->local->key_mtx); } -static void __ieee80211_key_todo(void) +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; - bool work_done; - u32 todoflags; - /* - * NB: sta_info_destroy relies on this! - */ - synchronize_rcu(); - - spin_lock_bh(&todo_lock); - while (!list_empty(&todo_list)) { - key = list_first_entry(&todo_list, struct ieee80211_key, todo); - list_del_init(&key->todo); - todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS | - KEY_FLAG_TODO_DEFKEY | - KEY_FLAG_TODO_DEFMGMTKEY | - KEY_FLAG_TODO_HWACCEL_ADD | - KEY_FLAG_TODO_HWACCEL_REMOVE | - KEY_FLAG_TODO_DELETE); - key->flags &= ~todoflags; - spin_unlock_bh(&todo_lock); - - work_done = false; - - if (todoflags & KEY_FLAG_TODO_ADD_DEBUGFS) { - ieee80211_debugfs_key_add(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DEFKEY) { - ieee80211_debugfs_key_remove_default(key->sdata); - ieee80211_debugfs_key_add_default(key->sdata); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) { - ieee80211_debugfs_key_remove_mgmt_default(key->sdata); - ieee80211_debugfs_key_add_mgmt_default(key->sdata); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) { - ieee80211_key_enable_hw_accel(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_HWACCEL_REMOVE) { - ieee80211_key_disable_hw_accel(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DELETE) { - __ieee80211_key_destroy(key); - work_done = true; - } + ASSERT_RTNL(); - WARN_ON(!work_done); + mutex_lock(&sdata->local->key_mtx); - spin_lock_bh(&todo_lock); - } - spin_unlock_bh(&todo_lock); -} + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_disable_hw_accel(key); -void ieee80211_key_todo(void) -{ - ieee80211_key_lock(); - __ieee80211_key_todo(); - ieee80211_key_unlock(); + mutex_unlock(&sdata->local->key_mtx); } void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key, *tmp; - unsigned long flags; - ieee80211_key_lock(); + mutex_lock(&sdata->local->key_mtx); ieee80211_debugfs_key_remove_default(sdata); ieee80211_debugfs_key_remove_mgmt_default(sdata); - spin_lock_irqsave(&sdata->local->key_lock, flags); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) __ieee80211_key_free(key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - - __ieee80211_key_todo(); - ieee80211_key_unlock(); + mutex_unlock(&sdata->local->key_mtx); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index bdc2968..9996e3b 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -38,25 +38,9 @@ struct sta_info; * * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present * in the hardware for TX crypto hardware acceleration. - * @KEY_FLAG_TODO_DELETE: Key is marked for deletion and will, after an - * RCU grace period, no longer be reachable other than from the - * todo list. - * @KEY_FLAG_TODO_HWACCEL_ADD: Key needs to be added to hardware acceleration. - * @KEY_FLAG_TODO_HWACCEL_REMOVE: Key needs to be removed from hardware - * acceleration. - * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated. - * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs. - * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs - * to be updated. */ enum ieee80211_internal_key_flags { KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), - KEY_FLAG_TODO_DELETE = BIT(1), - KEY_FLAG_TODO_HWACCEL_ADD = BIT(2), - KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3), - KEY_FLAG_TODO_DEFKEY = BIT(4), - KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), - KEY_FLAG_TODO_DEFMGMTKEY = BIT(6), }; enum ieee80211_internal_tkip_state { @@ -79,10 +63,8 @@ struct ieee80211_key { /* for sdata list */ struct list_head list; - /* for todo list */ - struct list_head todo; - /* protected by todo lock! */ + /* protected by key mutex */ unsigned int flags; union { @@ -155,6 +137,4 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); -void ieee80211_key_todo(void); - #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4051b23..045ead9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -448,7 +448,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, mutex_init(&local->iflist_mtx); mutex_init(&local->scan_mtx); - spin_lock_init(&local->key_lock); + mutex_init(&local->key_mtx); spin_lock_init(&local->filter_lock); spin_lock_init(&local->queue_stop_reason_lock); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 7301975..c426c57 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -648,14 +648,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) if (sta->key) { ieee80211_key_free(sta->key); - /* - * We have only unlinked the key, and actually destroying it - * may mean it is removed from hardware which requires that - * the key->sta pointer is still valid, so flush the key todo - * list here. - */ - ieee80211_key_todo(); - WARN_ON(sta->key); } -- cgit v1.1 From 2826bcd844e05dcbef9b9284bddb7fe88e8d314f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 2 Jun 2010 02:57:34 +0200 Subject: mac80211: reduce debugfs code size This patch reduces the binary size by around 25k (measured on MIPS, with CONFIG_MAC80211_DEBUG_COUNTERS enabled). Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 154 +++++++++++++++++---------------------------- net/mac80211/debugfs_sta.c | 45 ++++++------- 2 files changed, 77 insertions(+), 122 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 637929b..a694c59 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -307,9 +307,6 @@ static const struct file_operations queues_ops = { /* statistics stuff */ -#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ - DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value) - static ssize_t format_devstat_counter(struct ieee80211_local *local, char __user *userbuf, size_t count, loff_t *ppos, @@ -351,75 +348,16 @@ static const struct file_operations stats_ ##name## _ops = { \ .open = mac80211_open_file_generic, \ }; -#define DEBUGFS_STATS_ADD(name) \ +#define DEBUGFS_STATS_ADD(name, field) \ + debugfs_create_u32(#name, 0400, statsd, (u32 *) &field); +#define DEBUGFS_DEVSTATS_ADD(name) \ debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); -DEBUGFS_STATS_FILE(transmitted_fragment_count, 20, "%u", - local->dot11TransmittedFragmentCount); -DEBUGFS_STATS_FILE(multicast_transmitted_frame_count, 20, "%u", - local->dot11MulticastTransmittedFrameCount); -DEBUGFS_STATS_FILE(failed_count, 20, "%u", - local->dot11FailedCount); -DEBUGFS_STATS_FILE(retry_count, 20, "%u", - local->dot11RetryCount); -DEBUGFS_STATS_FILE(multiple_retry_count, 20, "%u", - local->dot11MultipleRetryCount); -DEBUGFS_STATS_FILE(frame_duplicate_count, 20, "%u", - local->dot11FrameDuplicateCount); -DEBUGFS_STATS_FILE(received_fragment_count, 20, "%u", - local->dot11ReceivedFragmentCount); -DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u", - local->dot11MulticastReceivedFrameCount); -DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u", - local->dot11TransmittedFrameCount); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS -DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u", - local->tx_handlers_drop); -DEBUGFS_STATS_FILE(tx_handlers_queued, 20, "%u", - local->tx_handlers_queued); -DEBUGFS_STATS_FILE(tx_handlers_drop_unencrypted, 20, "%u", - local->tx_handlers_drop_unencrypted); -DEBUGFS_STATS_FILE(tx_handlers_drop_fragment, 20, "%u", - local->tx_handlers_drop_fragment); -DEBUGFS_STATS_FILE(tx_handlers_drop_wep, 20, "%u", - local->tx_handlers_drop_wep); -DEBUGFS_STATS_FILE(tx_handlers_drop_not_assoc, 20, "%u", - local->tx_handlers_drop_not_assoc); -DEBUGFS_STATS_FILE(tx_handlers_drop_unauth_port, 20, "%u", - local->tx_handlers_drop_unauth_port); -DEBUGFS_STATS_FILE(rx_handlers_drop, 20, "%u", - local->rx_handlers_drop); -DEBUGFS_STATS_FILE(rx_handlers_queued, 20, "%u", - local->rx_handlers_queued); -DEBUGFS_STATS_FILE(rx_handlers_drop_nullfunc, 20, "%u", - local->rx_handlers_drop_nullfunc); -DEBUGFS_STATS_FILE(rx_handlers_drop_defrag, 20, "%u", - local->rx_handlers_drop_defrag); -DEBUGFS_STATS_FILE(rx_handlers_drop_short, 20, "%u", - local->rx_handlers_drop_short); -DEBUGFS_STATS_FILE(rx_handlers_drop_passive_scan, 20, "%u", - local->rx_handlers_drop_passive_scan); -DEBUGFS_STATS_FILE(tx_expand_skb_head, 20, "%u", - local->tx_expand_skb_head); -DEBUGFS_STATS_FILE(tx_expand_skb_head_cloned, 20, "%u", - local->tx_expand_skb_head_cloned); -DEBUGFS_STATS_FILE(rx_expand_skb_head, 20, "%u", - local->rx_expand_skb_head); -DEBUGFS_STATS_FILE(rx_expand_skb_head2, 20, "%u", - local->rx_expand_skb_head2); -DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u", - local->rx_handlers_fragments); -DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u", - local->tx_status_drop); - -#endif - DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount); DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount); DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount); DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount); - void debugfs_hw_add(struct ieee80211_local *local) { struct dentry *phyd = local->hw.wiphy->debugfsdir; @@ -448,38 +386,60 @@ void debugfs_hw_add(struct ieee80211_local *local) if (!statsd) return; - DEBUGFS_STATS_ADD(transmitted_fragment_count); - DEBUGFS_STATS_ADD(multicast_transmitted_frame_count); - DEBUGFS_STATS_ADD(failed_count); - DEBUGFS_STATS_ADD(retry_count); - DEBUGFS_STATS_ADD(multiple_retry_count); - DEBUGFS_STATS_ADD(frame_duplicate_count); - DEBUGFS_STATS_ADD(received_fragment_count); - DEBUGFS_STATS_ADD(multicast_received_frame_count); - DEBUGFS_STATS_ADD(transmitted_frame_count); + DEBUGFS_STATS_ADD(transmitted_fragment_count, + local->dot11TransmittedFragmentCount); + DEBUGFS_STATS_ADD(multicast_transmitted_frame_count, + local->dot11MulticastTransmittedFrameCount); + DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount); + DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount); + DEBUGFS_STATS_ADD(multiple_retry_count, + local->dot11MultipleRetryCount); + DEBUGFS_STATS_ADD(frame_duplicate_count, + local->dot11FrameDuplicateCount); + DEBUGFS_STATS_ADD(received_fragment_count, + local->dot11ReceivedFragmentCount); + DEBUGFS_STATS_ADD(multicast_received_frame_count, + local->dot11MulticastReceivedFrameCount); + DEBUGFS_STATS_ADD(transmitted_frame_count, + local->dot11TransmittedFrameCount); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_STATS_ADD(tx_handlers_drop); - DEBUGFS_STATS_ADD(tx_handlers_queued); - DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted); - DEBUGFS_STATS_ADD(tx_handlers_drop_fragment); - DEBUGFS_STATS_ADD(tx_handlers_drop_wep); - DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc); - DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port); - DEBUGFS_STATS_ADD(rx_handlers_drop); - DEBUGFS_STATS_ADD(rx_handlers_queued); - DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc); - DEBUGFS_STATS_ADD(rx_handlers_drop_defrag); - DEBUGFS_STATS_ADD(rx_handlers_drop_short); - DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan); - DEBUGFS_STATS_ADD(tx_expand_skb_head); - DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned); - DEBUGFS_STATS_ADD(rx_expand_skb_head); - DEBUGFS_STATS_ADD(rx_expand_skb_head2); - DEBUGFS_STATS_ADD(rx_handlers_fragments); - DEBUGFS_STATS_ADD(tx_status_drop); + DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop); + DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued); + DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted, + local->tx_handlers_drop_unencrypted); + DEBUGFS_STATS_ADD(tx_handlers_drop_fragment, + local->tx_handlers_drop_fragment); + DEBUGFS_STATS_ADD(tx_handlers_drop_wep, + local->tx_handlers_drop_wep); + DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc, + local->tx_handlers_drop_not_assoc); + DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port, + local->tx_handlers_drop_unauth_port); + DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop); + DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued); + DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc, + local->rx_handlers_drop_nullfunc); + DEBUGFS_STATS_ADD(rx_handlers_drop_defrag, + local->rx_handlers_drop_defrag); + DEBUGFS_STATS_ADD(rx_handlers_drop_short, + local->rx_handlers_drop_short); + DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan, + local->rx_handlers_drop_passive_scan); + DEBUGFS_STATS_ADD(tx_expand_skb_head, + local->tx_expand_skb_head); + DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned, + local->tx_expand_skb_head_cloned); + DEBUGFS_STATS_ADD(rx_expand_skb_head, + local->rx_expand_skb_head); + DEBUGFS_STATS_ADD(rx_expand_skb_head2, + local->rx_expand_skb_head2); + DEBUGFS_STATS_ADD(rx_handlers_fragments, + local->rx_handlers_fragments); + DEBUGFS_STATS_ADD(tx_status_drop, + local->tx_status_drop); #endif - DEBUGFS_STATS_ADD(dot11ACKFailureCount); - DEBUGFS_STATS_ADD(dot11RTSFailureCount); - DEBUGFS_STATS_ADD(dot11FCSErrorCount); - DEBUGFS_STATS_ADD(dot11RTSSuccessCount); + DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount); + DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); + DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount); + DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount); } diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 9f14061..576e024 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -30,7 +30,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \ } #define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n") #define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n") -#define STA_READ_LU(name, field) STA_READ(name, 20, field, "%lu\n") #define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n") #define STA_OPS(name) \ @@ -52,19 +51,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_FILE(aid, sta.aid, D); STA_FILE(dev, sdata->name, S); -STA_FILE(rx_packets, rx_packets, LU); -STA_FILE(tx_packets, tx_packets, LU); -STA_FILE(rx_bytes, rx_bytes, LU); -STA_FILE(tx_bytes, tx_bytes, LU); -STA_FILE(rx_duplicates, num_duplicates, LU); -STA_FILE(rx_fragments, rx_fragments, LU); -STA_FILE(rx_dropped, rx_dropped, LU); -STA_FILE(tx_fragments, tx_fragments, LU); -STA_FILE(tx_filtered, tx_filtered_count, LU); -STA_FILE(tx_retry_failed, tx_retry_failed, LU); -STA_FILE(tx_retry_count, tx_retry_count, LU); STA_FILE(last_signal, last_signal, D); -STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -306,6 +293,13 @@ STA_OPS(ht_capa); debugfs_create_file(#name, 0400, \ sta->debugfs.dir, sta, &sta_ ##name## _ops); +#define DEBUGFS_ADD_COUNTER(name, field) \ + if (sizeof(sta->field) == sizeof(u32)) \ + debugfs_create_u32(#name, 0400, sta->debugfs.dir, \ + (u32 *) &sta->field); \ + else \ + debugfs_create_u64(#name, 0400, sta->debugfs.dir, \ + (u64 *) &sta->field); void ieee80211_sta_debugfs_add(struct sta_info *sta) { @@ -338,20 +332,21 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(last_seq_ctrl); DEBUGFS_ADD(agg_status); DEBUGFS_ADD(dev); - DEBUGFS_ADD(rx_packets); - DEBUGFS_ADD(tx_packets); - DEBUGFS_ADD(rx_bytes); - DEBUGFS_ADD(tx_bytes); - DEBUGFS_ADD(rx_duplicates); - DEBUGFS_ADD(rx_fragments); - DEBUGFS_ADD(rx_dropped); - DEBUGFS_ADD(tx_fragments); - DEBUGFS_ADD(tx_filtered); - DEBUGFS_ADD(tx_retry_failed); - DEBUGFS_ADD(tx_retry_count); DEBUGFS_ADD(last_signal); - DEBUGFS_ADD(wep_weak_iv_count); DEBUGFS_ADD(ht_capa); + + DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); + DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); + DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes); + DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes); + DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); + DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); + DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped); + DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments); + DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); + DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed); + DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); + DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count); } void ieee80211_sta_debugfs_remove(struct sta_info *sta) -- cgit v1.1 From 4736022844fe694c4ee971fa2b6c1cb38dadbc78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 04:13:21 +0000 Subject: ipv4: RCU changes in __mkroute_input() Avoid two atomic ops on output device refcount Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1cfe0d1..7b8eacd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1957,22 +1957,22 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +/* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { - struct rtable *rth; int err; struct in_device *out_dev; - unsigned flags = 0; + unsigned int flags = 0; __be32 spec_dst; u32 itag; /* get a working reference to the output device */ - out_dev = in_dev_get(FIB_RES_DEV(*res)); + out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); if (out_dev == NULL) { if (net_ratelimit()) printk(KERN_CRIT "Bug in ip_route_input" \ @@ -2053,8 +2053,6 @@ static int __mkroute_input(struct sk_buff *skb, *result = rth; err = 0; cleanup: - /* release the working reference to the output device */ - in_dev_put(out_dev); return err; } -- cgit v1.1 From faa9dcf793beba05f7178b63a59eaa3ca5175b6a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 04:09:10 +0000 Subject: arp: RCU changes Avoid two atomic ops in arp_fwd_proxy() Avoid two atomic ops in arp_process() Valid optims since arp_rcv() is run under rcu_read_lock() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/arp.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f094b75..917d2d6 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -545,10 +545,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, /* place to check for proxy_arp for routes */ - if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { + out_dev = __in_dev_get_rcu(rt->u.dst.dev); + if (out_dev) omi = IN_DEV_MEDIUM_ID(out_dev); - in_dev_put(out_dev); - } + return (omi != imi && omi != -1); } @@ -741,7 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, static int arp_process(struct sk_buff *skb) { struct net_device *dev = skb->dev; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct arphdr *arp; unsigned char *arp_ptr; struct rtable *rt; @@ -890,7 +890,6 @@ static int arp_process(struct sk_buff *skb) arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); } else { pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); - in_dev_put(in_dev); return 0; } goto out; @@ -936,8 +935,6 @@ static int arp_process(struct sk_buff *skb) } out: - if (in_dev) - in_dev_put(in_dev); consume_skb(skb); return 0; } -- cgit v1.1 From c6d409cfd0fd41e7a0847875e4338ad648c9b96b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 20:03:40 -0700 Subject: From abbffa2aa9bd6f8df16d0d0a102af677510d8b9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 04:29:41 +0000 Subject: [PATCH 2/3] net: net/socket.c and net/compat.c cleanups cleanup patch, to match modern coding style. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/compat.c | 47 ++++++++--------- net/socket.c | 165 ++++++++++++++++++++++++++++------------------------------ 2 files changed, 102 insertions(+), 110 deletions(-) diff --git a/net/compat.c b/net/compat.c index 1cf7590..63d260e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, int tot_len; if (kern_msg->msg_namelen) { - if (mode==VERIFY_READ) { + if (mode == VERIFY_READ) { int err = move_addr_to_kernel(kern_msg->msg_name, kern_msg->msg_namelen, kern_address); @@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, static int do_set_sock_timeout(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - struct compat_timeval __user *up = (struct compat_timeval __user *) optval; + struct compat_timeval __user *up = (struct compat_timeval __user *)optval; struct timeval ktime; mm_segment_t old_fs; int err; @@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level, return -EFAULT; old_fs = get_fs(); set_fs(KERNEL_DS); - err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime)); + err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime)); set_fs(old_fs); return err; @@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_setsockopt(sock,level,optname); + if (sock) { + err = security_socket_setsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname, int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct compat_timeval __user *ctv = - (struct compat_timeval __user*) userstamp; + (struct compat_timeval __user *) userstamp; int err = -ENOENT; struct timeval tv; @@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp); int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) { struct compat_timespec __user *ctv = - (struct compat_timespec __user*) userstamp; + (struct compat_timespec __user *) userstamp; int err = -ENOENT; struct timespec ts; @@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_getsockopt(sock, level, - optname); + if (sock) { + err = security_socket_getsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -557,7 +554,7 @@ struct compat_group_filter { int compat_mc_setsockopt(struct sock *sock, int level, int optname, char __user *optval, unsigned int optlen, - int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int)) + int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int)) { char __user *koptval = optval; int koptlen = optlen; @@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, } return setsockopt(sock, level, optname, koptval, koptlen); } - EXPORT_SYMBOL(compat_mc_setsockopt); int compat_mc_getsockopt(struct sock *sock, int level, int optname, char __user *optval, int __user *optlen, - int (*getsockopt)(struct sock *,int,int,char __user *,int __user *)) + int (*getsockopt)(struct sock *, int, int, char __user *, int __user *)) { struct compat_group_filter __user *gf32 = (void *)optval; struct group_filter __user *kgf; @@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, __put_user(interface, &kgf->gf_interface) || __put_user(fmode, &kgf->gf_fmode) || __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group))) + copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group))) return -EFAULT; err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); @@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, copylen = numsrc * sizeof(gf32->gf_slist[0]); if (copylen > klen) copylen = klen; - if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) + if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) return -EFAULT; } return err; } - EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5)}; +static unsigned char nas[20] = { + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) +}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: - ret = sys_shutdown(a0,a1); + ret = sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = compat_sys_setsockopt(a0, a1, a[2], diff --git a/net/socket.c b/net/socket.c index 367d547..b63c051 100644 --- a/net/socket.c +++ b/net/socket.c @@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags); /* @@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly; * Statistics counters of the socket lists */ -static DEFINE_PER_CPU(int, sockets_in_use) = 0; +static DEFINE_PER_CPU(int, sockets_in_use); /* * Support routines. @@ -309,9 +309,9 @@ static int init_inodecache(void) } static const struct super_operations sockfs_ops = { - .alloc_inode = sock_alloc_inode, - .destroy_inode =sock_destroy_inode, - .statfs = simple_statfs, + .alloc_inode = sock_alloc_inode, + .destroy_inode = sock_destroy_inode, + .statfs = simple_statfs, }; static int sockfs_get_sb(struct file_system_type *fs_type, @@ -411,6 +411,7 @@ int sock_map_fd(struct socket *sock, int flags) return fd; } +EXPORT_SYMBOL(sock_map_fd); static struct socket *sock_from_file(struct file *file, int *err) { @@ -422,7 +423,7 @@ static struct socket *sock_from_file(struct file *file, int *err) } /** - * sockfd_lookup - Go from a file number to its socket slot + * sockfd_lookup - Go from a file number to its socket slot * @fd: file handle * @err: pointer to an error code return * @@ -450,6 +451,7 @@ struct socket *sockfd_lookup(int fd, int *err) fput(file); return sock; } +EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { @@ -540,6 +542,7 @@ void sock_release(struct socket *sock) } sock->file = NULL; } +EXPORT_SYMBOL(sock_release); int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, union skb_shared_tx *shtx) @@ -586,6 +589,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) @@ -604,6 +608,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_sendmsg); static int ktime2ts(ktime_t kt, struct timespec *ts) { @@ -664,7 +669,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(ts), &ts); } - EXPORT_SYMBOL_GPL(__sock_recv_timestamp); inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -720,6 +724,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_recvmsg); static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -752,6 +757,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_recvmsg); static void sock_aio_dtor(struct kiocb *iocb) { @@ -774,7 +780,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct socket *sock = file->private_data; @@ -887,7 +893,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { @@ -895,7 +901,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } - EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); @@ -907,7 +912,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } - EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); @@ -919,7 +923,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } - EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, @@ -1047,6 +1050,7 @@ out_release: sock = NULL; goto out; } +EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) @@ -1147,6 +1151,7 @@ call_kill: rcu_read_unlock(); return 0; } +EXPORT_SYMBOL(sock_wake_async); static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) @@ -1265,11 +1270,13 @@ int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } +EXPORT_SYMBOL(sock_create); int sock_create_kern(int family, int type, int protocol, struct socket **res) { return __sock_create(&init_net, family, type, protocol, res, 1); } +EXPORT_SYMBOL(sock_create_kern); SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { @@ -1474,7 +1481,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + newsock = sock_alloc(); + if (!newsock) goto out_put; newsock->type = sock->type; @@ -1861,8 +1869,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1964,8 +1971,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; err = -EMSGSIZE; @@ -2191,10 +2197,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) static const unsigned char nargs[20] = { - AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5) + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) }; #undef AL @@ -2340,6 +2346,7 @@ int sock_register(const struct net_proto_family *ops) printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; } +EXPORT_SYMBOL(sock_register); /** * sock_unregister - remove a protocol handler @@ -2366,6 +2373,7 @@ void sock_unregister(int family) printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); } +EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { @@ -2490,13 +2498,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifc.ifc_req = NULL; uifc = compat_alloc_user_space(sizeof(struct ifconf)); } else { - size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * - sizeof (struct ifreq); + size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * + sizeof(struct ifreq); uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); ifc.ifc_len = len; ifr = ifc.ifc_req = (void __user *)(uifc + 1); ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { + for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) return -EFAULT; ifr++; @@ -2516,9 +2524,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifr = ifc.ifc_req; ifr32 = compat_ptr(ifc32.ifcbuf); for (i = 0, j = 0; - i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) + i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; + i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { + if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) return -EFAULT; ifr32++; ifr++; @@ -2567,7 +2575,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2601,9 +2609,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, &kifr); - set_fs (old_fs); + set_fs(old_fs); return err; case SIOCBONDSLAVEINFOQUERY: @@ -2710,9 +2718,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, (void __user *)&ifr); - set_fs (old_fs); + set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@ -2734,7 +2742,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2750,20 +2758,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif } struct rtentry32 { - u32 rt_pad1; + u32 rt_pad1; struct sockaddr rt_dst; /* target address */ struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ unsigned short rt_irtt; /* Initial RTT */ }; @@ -2793,29 +2801,29 @@ static int routing_ioctl(struct net *net, struct socket *sock, if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), + ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3 * sizeof(struct in6_addr)); - ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); r = (void *) &r6; } else { /* ipv4 */ struct rtentry32 __user *ur4 = argp; - ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), + ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3 * sizeof(struct sockaddr)); - ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); - ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); - ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); - ret |= __get_user (r4.rt_window, &(ur4->rt_window)); - ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); - ret |= __get_user (rtdev, &(ur4->rt_dev)); + ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); + ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); + ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); + ret |= __get_user(r4.rt_window, &(ur4->rt_window)); + ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); + ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { - ret |= copy_from_user (devname, compat_ptr(rtdev), 15); + ret |= copy_from_user(devname, compat_ptr(rtdev), 15); r4.rt_dev = devname; devname[15] = 0; } else r4.rt_dev = NULL; @@ -2828,9 +2836,9 @@ static int routing_ioctl(struct net *net, struct socket *sock, goto out; } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs (old_fs); + set_fs(old_fs); out: return ret; @@ -2993,11 +3001,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } +EXPORT_SYMBOL(kernel_bind); int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } +EXPORT_SYMBOL(kernel_listen); int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { @@ -3022,24 +3032,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) done: return err; } +EXPORT_SYMBOL(kernel_accept); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { return sock->ops->connect(sock, addr, addrlen, flags); } +EXPORT_SYMBOL(kernel_connect); int kernel_getsockname(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 0); } +EXPORT_SYMBOL(kernel_getsockname); int kernel_getpeername(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 1); } +EXPORT_SYMBOL(kernel_getpeername); int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) @@ -3056,6 +3070,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_getsockopt); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) @@ -3072,6 +3087,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_setsockopt); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) @@ -3083,6 +3099,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, return sock_no_sendpage(sock, page, offset, size, flags); } +EXPORT_SYMBOL(kernel_sendpage); int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) { @@ -3095,33 +3112,11 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) return err; } +EXPORT_SYMBOL(kernel_sock_ioctl); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } - -EXPORT_SYMBOL(sock_create); -EXPORT_SYMBOL(sock_create_kern); -EXPORT_SYMBOL(sock_create_lite); -EXPORT_SYMBOL(sock_map_fd); -EXPORT_SYMBOL(sock_recvmsg); -EXPORT_SYMBOL(sock_register); -EXPORT_SYMBOL(sock_release); -EXPORT_SYMBOL(sock_sendmsg); -EXPORT_SYMBOL(sock_unregister); -EXPORT_SYMBOL(sock_wake_async); -EXPORT_SYMBOL(sockfd_lookup); -EXPORT_SYMBOL(kernel_sendmsg); -EXPORT_SYMBOL(kernel_recvmsg); -EXPORT_SYMBOL(kernel_bind); -EXPORT_SYMBOL(kernel_listen); -EXPORT_SYMBOL(kernel_accept); -EXPORT_SYMBOL(kernel_connect); -EXPORT_SYMBOL(kernel_getsockname); -EXPORT_SYMBOL(kernel_getpeername); -EXPORT_SYMBOL(kernel_getsockopt); -EXPORT_SYMBOL(kernel_setsockopt); -EXPORT_SYMBOL(kernel_sendpage); -EXPORT_SYMBOL(kernel_sock_ioctl); EXPORT_SYMBOL(kernel_sock_shutdown); + -- 1.7.0.4 --- net/compat.c | 47 ++++++++--------- net/socket.c | 164 ++++++++++++++++++++++++++++------------------------------- 2 files changed, 101 insertions(+), 110 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 1cf7590..63d260e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, int tot_len; if (kern_msg->msg_namelen) { - if (mode==VERIFY_READ) { + if (mode == VERIFY_READ) { int err = move_addr_to_kernel(kern_msg->msg_name, kern_msg->msg_namelen, kern_address); @@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, static int do_set_sock_timeout(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - struct compat_timeval __user *up = (struct compat_timeval __user *) optval; + struct compat_timeval __user *up = (struct compat_timeval __user *)optval; struct timeval ktime; mm_segment_t old_fs; int err; @@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level, return -EFAULT; old_fs = get_fs(); set_fs(KERNEL_DS); - err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime)); + err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime)); set_fs(old_fs); return err; @@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_setsockopt(sock,level,optname); + if (sock) { + err = security_socket_setsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname, int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct compat_timeval __user *ctv = - (struct compat_timeval __user*) userstamp; + (struct compat_timeval __user *) userstamp; int err = -ENOENT; struct timeval tv; @@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp); int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) { struct compat_timespec __user *ctv = - (struct compat_timespec __user*) userstamp; + (struct compat_timespec __user *) userstamp; int err = -ENOENT; struct timespec ts; @@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_getsockopt(sock, level, - optname); + if (sock) { + err = security_socket_getsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -557,7 +554,7 @@ struct compat_group_filter { int compat_mc_setsockopt(struct sock *sock, int level, int optname, char __user *optval, unsigned int optlen, - int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int)) + int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int)) { char __user *koptval = optval; int koptlen = optlen; @@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, } return setsockopt(sock, level, optname, koptval, koptlen); } - EXPORT_SYMBOL(compat_mc_setsockopt); int compat_mc_getsockopt(struct sock *sock, int level, int optname, char __user *optval, int __user *optlen, - int (*getsockopt)(struct sock *,int,int,char __user *,int __user *)) + int (*getsockopt)(struct sock *, int, int, char __user *, int __user *)) { struct compat_group_filter __user *gf32 = (void *)optval; struct group_filter __user *kgf; @@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, __put_user(interface, &kgf->gf_interface) || __put_user(fmode, &kgf->gf_fmode) || __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group))) + copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group))) return -EFAULT; err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); @@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, copylen = numsrc * sizeof(gf32->gf_slist[0]); if (copylen > klen) copylen = klen; - if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) + if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) return -EFAULT; } return err; } - EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5)}; +static unsigned char nas[20] = { + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) +}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: - ret = sys_shutdown(a0,a1); + ret = sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = compat_sys_setsockopt(a0, a1, a[2], diff --git a/net/socket.c b/net/socket.c index 367d547..acfa173 100644 --- a/net/socket.c +++ b/net/socket.c @@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags); /* @@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly; * Statistics counters of the socket lists */ -static DEFINE_PER_CPU(int, sockets_in_use) = 0; +static DEFINE_PER_CPU(int, sockets_in_use); /* * Support routines. @@ -309,9 +309,9 @@ static int init_inodecache(void) } static const struct super_operations sockfs_ops = { - .alloc_inode = sock_alloc_inode, - .destroy_inode =sock_destroy_inode, - .statfs = simple_statfs, + .alloc_inode = sock_alloc_inode, + .destroy_inode = sock_destroy_inode, + .statfs = simple_statfs, }; static int sockfs_get_sb(struct file_system_type *fs_type, @@ -411,6 +411,7 @@ int sock_map_fd(struct socket *sock, int flags) return fd; } +EXPORT_SYMBOL(sock_map_fd); static struct socket *sock_from_file(struct file *file, int *err) { @@ -422,7 +423,7 @@ static struct socket *sock_from_file(struct file *file, int *err) } /** - * sockfd_lookup - Go from a file number to its socket slot + * sockfd_lookup - Go from a file number to its socket slot * @fd: file handle * @err: pointer to an error code return * @@ -450,6 +451,7 @@ struct socket *sockfd_lookup(int fd, int *err) fput(file); return sock; } +EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { @@ -540,6 +542,7 @@ void sock_release(struct socket *sock) } sock->file = NULL; } +EXPORT_SYMBOL(sock_release); int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, union skb_shared_tx *shtx) @@ -586,6 +589,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) @@ -604,6 +608,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_sendmsg); static int ktime2ts(ktime_t kt, struct timespec *ts) { @@ -664,7 +669,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(ts), &ts); } - EXPORT_SYMBOL_GPL(__sock_recv_timestamp); inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -720,6 +724,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_recvmsg); static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -752,6 +757,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_recvmsg); static void sock_aio_dtor(struct kiocb *iocb) { @@ -774,7 +780,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct socket *sock = file->private_data; @@ -887,7 +893,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { @@ -895,7 +901,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } - EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); @@ -907,7 +912,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } - EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); @@ -919,7 +923,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } - EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, @@ -1047,6 +1050,7 @@ out_release: sock = NULL; goto out; } +EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) @@ -1147,6 +1151,7 @@ call_kill: rcu_read_unlock(); return 0; } +EXPORT_SYMBOL(sock_wake_async); static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) @@ -1265,11 +1270,13 @@ int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } +EXPORT_SYMBOL(sock_create); int sock_create_kern(int family, int type, int protocol, struct socket **res) { return __sock_create(&init_net, family, type, protocol, res, 1); } +EXPORT_SYMBOL(sock_create_kern); SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { @@ -1474,7 +1481,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + newsock = sock_alloc(); + if (!newsock) goto out_put; newsock->type = sock->type; @@ -1861,8 +1869,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1964,8 +1971,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; err = -EMSGSIZE; @@ -2191,10 +2197,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) static const unsigned char nargs[20] = { - AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5) + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) }; #undef AL @@ -2340,6 +2346,7 @@ int sock_register(const struct net_proto_family *ops) printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; } +EXPORT_SYMBOL(sock_register); /** * sock_unregister - remove a protocol handler @@ -2366,6 +2373,7 @@ void sock_unregister(int family) printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); } +EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { @@ -2490,13 +2498,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifc.ifc_req = NULL; uifc = compat_alloc_user_space(sizeof(struct ifconf)); } else { - size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * - sizeof (struct ifreq); + size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * + sizeof(struct ifreq); uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); ifc.ifc_len = len; ifr = ifc.ifc_req = (void __user *)(uifc + 1); ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { + for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) return -EFAULT; ifr++; @@ -2516,9 +2524,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifr = ifc.ifc_req; ifr32 = compat_ptr(ifc32.ifcbuf); for (i = 0, j = 0; - i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) + i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; + i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { + if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) return -EFAULT; ifr32++; ifr++; @@ -2567,7 +2575,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2601,9 +2609,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, &kifr); - set_fs (old_fs); + set_fs(old_fs); return err; case SIOCBONDSLAVEINFOQUERY: @@ -2710,9 +2718,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, (void __user *)&ifr); - set_fs (old_fs); + set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@ -2734,7 +2742,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2750,20 +2758,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif } struct rtentry32 { - u32 rt_pad1; + u32 rt_pad1; struct sockaddr rt_dst; /* target address */ struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ unsigned short rt_irtt; /* Initial RTT */ }; @@ -2793,29 +2801,29 @@ static int routing_ioctl(struct net *net, struct socket *sock, if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), + ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3 * sizeof(struct in6_addr)); - ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); r = (void *) &r6; } else { /* ipv4 */ struct rtentry32 __user *ur4 = argp; - ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), + ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3 * sizeof(struct sockaddr)); - ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); - ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); - ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); - ret |= __get_user (r4.rt_window, &(ur4->rt_window)); - ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); - ret |= __get_user (rtdev, &(ur4->rt_dev)); + ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); + ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); + ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); + ret |= __get_user(r4.rt_window, &(ur4->rt_window)); + ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); + ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { - ret |= copy_from_user (devname, compat_ptr(rtdev), 15); + ret |= copy_from_user(devname, compat_ptr(rtdev), 15); r4.rt_dev = devname; devname[15] = 0; } else r4.rt_dev = NULL; @@ -2828,9 +2836,9 @@ static int routing_ioctl(struct net *net, struct socket *sock, goto out; } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs (old_fs); + set_fs(old_fs); out: return ret; @@ -2993,11 +3001,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } +EXPORT_SYMBOL(kernel_bind); int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } +EXPORT_SYMBOL(kernel_listen); int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { @@ -3022,24 +3032,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) done: return err; } +EXPORT_SYMBOL(kernel_accept); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { return sock->ops->connect(sock, addr, addrlen, flags); } +EXPORT_SYMBOL(kernel_connect); int kernel_getsockname(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 0); } +EXPORT_SYMBOL(kernel_getsockname); int kernel_getpeername(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 1); } +EXPORT_SYMBOL(kernel_getpeername); int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) @@ -3056,6 +3070,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_getsockopt); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) @@ -3072,6 +3087,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_setsockopt); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) @@ -3083,6 +3099,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, return sock_no_sendpage(sock, page, offset, size, flags); } +EXPORT_SYMBOL(kernel_sendpage); int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) { @@ -3095,33 +3112,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) return err; } +EXPORT_SYMBOL(kernel_sock_ioctl); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } - -EXPORT_SYMBOL(sock_create); -EXPORT_SYMBOL(sock_create_kern); -EXPORT_SYMBOL(sock_create_lite); -EXPORT_SYMBOL(sock_map_fd); -EXPORT_SYMBOL(sock_recvmsg); -EXPORT_SYMBOL(sock_register); -EXPORT_SYMBOL(sock_release); -EXPORT_SYMBOL(sock_sendmsg); -EXPORT_SYMBOL(sock_unregister); -EXPORT_SYMBOL(sock_wake_async); -EXPORT_SYMBOL(sockfd_lookup); -EXPORT_SYMBOL(kernel_sendmsg); -EXPORT_SYMBOL(kernel_recvmsg); -EXPORT_SYMBOL(kernel_bind); -EXPORT_SYMBOL(kernel_listen); -EXPORT_SYMBOL(kernel_accept); -EXPORT_SYMBOL(kernel_connect); -EXPORT_SYMBOL(kernel_getsockname); -EXPORT_SYMBOL(kernel_getpeername); -EXPORT_SYMBOL(kernel_getsockopt); -EXPORT_SYMBOL(kernel_setsockopt); -EXPORT_SYMBOL(kernel_sendpage); -EXPORT_SYMBOL(kernel_sock_ioctl); EXPORT_SYMBOL(kernel_sock_shutdown); -- cgit v1.1 From 26b36cfefaf2be98b225e3c1a399edb0daf52ddd Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Fri, 4 Jun 2010 14:25:44 -0400 Subject: mac80211: make ARP filtering depend on CONFIG_INET Signed-off-by: John W. Linville --- net/mac80211/main.c | 6 ++++++ net/mac80211/mlme.c | 2 ++ 2 files changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 045ead9..5706156 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -329,6 +329,7 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) mutex_unlock(&local->iflist_mtx); } +#ifdef CONFIG_INET int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata) { struct in_device *idev; @@ -380,6 +381,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, return NOTIFY_DONE; } +#endif struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) @@ -669,10 +671,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_pm_qos; } +#ifdef CONFIG_INET local->ifa_notifier.notifier_call = ieee80211_ifa_changed; result = register_inetaddr_notifier(&local->ifa_notifier); if (result) goto fail_ifa; +#endif return 0; @@ -707,7 +711,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); +#ifdef CONFIG_INET unregister_inetaddr_notifier(&local->ifa_notifier); +#endif rtnl_lock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7e72013..3623bb7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2078,6 +2078,7 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta); return WORK_DONE_DESTROY; +#ifdef CONFIG_INET } else { mutex_unlock(&wk->sdata->u.mgd.mtx); @@ -2088,6 +2089,7 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, rtnl_lock(); ieee80211_set_arp_filter(wk->sdata); rtnl_unlock(); +#endif } } -- cgit v1.1 From 38a6cc7538d3c44b76f9dcea607a171adcc0208e Mon Sep 17 00:00:00 2001 From: Sujith Date: Wed, 19 May 2010 11:32:30 +0530 Subject: mac80211: Remove deprecated sta_notify commands STA_NOTIFY_ADD and STA_NOTIFY_REMOVE have no users anymore, and station addition/removal are indicated to drivers using sta_add() and sta_remove(), which can sleep. Signed-off-by: Sujith Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/driver-ops.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 978850e..d1139e4 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -269,9 +269,6 @@ static inline int drv_sta_add(struct ieee80211_local *local, if (local->ops->sta_add) ret = local->ops->sta_add(&local->hw, &sdata->vif, sta); - else if (local->ops->sta_notify) - local->ops->sta_notify(&local->hw, &sdata->vif, - STA_NOTIFY_ADD, sta); trace_drv_sta_add(local, sdata, sta, ret); @@ -286,9 +283,6 @@ static inline void drv_sta_remove(struct ieee80211_local *local, if (local->ops->sta_remove) local->ops->sta_remove(&local->hw, &sdata->vif, sta); - else if (local->ops->sta_notify) - local->ops->sta_notify(&local->hw, &sdata->vif, - STA_NOTIFY_REMOVE, sta); trace_drv_sta_remove(local, sdata, sta); } -- cgit v1.1 From 8b9a4e6e442756f670ef507f09bbc6c11dc0fca6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 May 2010 15:22:58 +0200 Subject: mac80211: process station blockack action frames from work Processing an association response could take a bit of time while we set up the hardware etc. During that time, the AP might already send a blockack request. If this happens very quickly on a fairly slow machine, we can end up processing the blockack request before the association processing has finished. Since the blockack processing cannot sleep right now, we also cannot make it wait in the driver. As a result, sometimes on slow machines the iwlagn driver gets totally confused, and no traffic can pass when the aggregation setup was done before the assoc setup completed. I'm working on a proper fix for this, which involves queuing all blockack category action frames from a work struct, and also allowing the ampdu_action driver callback to sleep, which will generally clean up the code and make things easier. However, this is a very involved and complex change. To fix the problem at hand in a way that can also be backported to stable, I've come up with this patch. Here, I simply process all aggregation action frames from the managed interface skb queue, which means their processing will be serialized with processing the association response, thereby fixing the problem. Signed-off-by: Johannes Berg Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 52 +++++++++++++++++++++++++++++++++++++++++++++------- net/mac80211/rx.c | 3 +++ 2 files changed, 48 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0839c4e..3310e70 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1692,14 +1692,52 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: - if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_BACK: { + struct ieee80211_local *local = sdata->local; + int len = skb->len; + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->sa); + if (!sta) { + rcu_read_unlock(); + break; + } + + local_bh_disable(); + + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_req))) + break; + ieee80211_process_addba_request(local, sta, mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_resp))) + break; + ieee80211_process_addba_resp(local, sta, mgmt, len); + break; + case WLAN_ACTION_DELBA: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.delba))) + break; + ieee80211_process_delba(sdata, sta, mgmt, len); + break; + } + local_bh_enable(); + rcu_read_unlock(); break; - - ieee80211_sta_process_chanswitch(sdata, - &mgmt->u.action.u.chan_switch.sw_elem, - (void *)ifmgd->associated->priv, - rx_status->mactime); - break; + } + case WLAN_CATEGORY_SPECTRUM_MGMT: + ieee80211_sta_process_chanswitch(sdata, + &mgmt->u.action.u.chan_switch.sw_elem, + (void *)ifmgd->associated->priv, + rx_status->mactime); + break; + } } mutex_unlock(&ifmgd->mtx); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5e0b654..be9abc2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1944,6 +1944,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (len < IEEE80211_MIN_ACTION_SIZE + 1) break; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + return ieee80211_sta_rx_mgmt(sdata, rx->skb); + switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + -- cgit v1.1 From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 4 Jun 2010 01:57:38 +0000 Subject: net: check for refcount if pop a stacked dst_entry xfrm triggers a warning if dst_pop() drops a refcount on a noref dst. This patch changes dst_pop() to skb_dst_pop(). skb_dst_pop() drops the refcnt only on a refcounted dst. Also we don't clone the child dst_entry, so it is not refcounted and we can use skb_dst_set_noref() in xfrm_output_one(). Signed-off-by: Steffen Klassert Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 6a32915..a3cca0a 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -95,13 +95,13 @@ resume: goto error_nolock; } - dst = dst_pop(dst); + dst = skb_dst_pop(skb); if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set(skb, dst); + skb_dst_set_noref(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); -- cgit v1.1 From 57f1553ee5d9f093660cc49098f494e17ed11668 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:42:30 +0000 Subject: syncookies: remove Kconfig text line about disabled-by-default syncookies default to on since e994b7c901ded7200b525a707c6da71f2cf6d4bb (tcp: Don't make syn cookies initial setting depend on CONFIG_SYSCTL). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8e3a1fd..7c3a7d1 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -303,7 +303,7 @@ config ARPD If unsure, say N. config SYN_COOKIES - bool "IP: TCP syncookie support (disabled per default)" + bool "IP: TCP syncookie support" ---help--- Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote @@ -328,13 +328,13 @@ config SYN_COOKIES server is really overloaded. If this happens frequently better turn them off. - If you say Y here, note that SYN cookies aren't enabled by default; - you can enable them by saying Y to "/proc file system support" and + If you say Y here, you can disable SYN cookies at run time by + saying Y to "/proc file system support" and "Sysctl support" below and executing the command - echo 1 >/proc/sys/net/ipv4/tcp_syncookies + echo 0 > /proc/sys/net/ipv4/tcp_syncookies - at boot time after the /proc file system has been mounted. + after the /proc file system has been mounted. If unsure, say N. -- cgit v1.1 From ca55158c6ecb7832a6ad80ac44a14d23bab8cdfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 09:03:58 +0000 Subject: rps: tcp: fix rps_sock_flow_table table updates I believe a moderate SYN flood attack can corrupt RFS flow table (rps_sock_flow_table), making RPS/RFS much less effective. Even in a normal situation, server handling short lived sessions suffer from bad steering for the first data packet of a session, if another SYN packet is received for another session. We do following action in tcp_v4_rcv() : sock_rps_save_rxhash(sk, skb->rxhash); We should _not_ do this if sk is a LISTEN socket, as about each packet received on a LISTEN socket has a different rxhash than previous one. -> RPS_NO_CPU markers are spread all over rps_sock_flow_table. Also, it makes sense to protect sk->rxhash field changes with socket lock (We currently can change it even if user thread owns the lock and might use rxhash) This patch moves sock_rps_save_rxhash() to a sock locked section, and only for non LISTEN sockets. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 202cf09..fe193e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1555,6 +1555,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; @@ -1579,7 +1580,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); + TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { @@ -1672,8 +1675,6 @@ process: skb->dev = NULL; - sock_rps_save_rxhash(sk, skb->rxhash); - bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { -- cgit v1.1 From c44649216522cd607a4027d2ebf4a8147d3fa94c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 05:45:47 +0000 Subject: tcp: use correct net ns in cookie_v4_check() Its better to make a route lookup in appropriate namespace. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5c24db4..9f6b222 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -347,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { .sport = th->dest, .dport = th->source } } }; security_req_classify_flow(req, &fl); - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); goto out; } -- cgit v1.1 From 2a1d4bd46047efff513600d7ff422bc344f540a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:43:12 +0000 Subject: syncookies: make v4/v6 synflood warning behaviour the same both syn_flood_warning functions print a message, but ipv4 version only prints a warning if CONFIG_SYN_COOKIES=y. Make the v4 one behave like the v6 one. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 202cf09..a13f881 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -793,19 +793,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -#ifdef CONFIG_SYN_COOKIES -static void syn_flood_warning(struct sk_buff *skb) +static void syn_flood_warning(const struct sk_buff *skb) { - static unsigned long warntime; + const char *msg; - if (time_after(jiffies, (warntime + HZ * 60))) { - warntime = jiffies; - printk(KERN_INFO - "possible SYN flooding on port %d. Sending cookies.\n", - ntohs(tcp_hdr(skb)->dest)); - } -} +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) + msg = "Sending cookies"; + else #endif + msg = "Dropping request"; + + pr_info("TCP: Possible SYN flooding on port %d. %s.\n", + ntohs(tcp_hdr(skb)->dest), msg); +} /* * Save and compile IPv4 options into the request_sock if needed. @@ -1243,6 +1244,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { + if (net_ratelimit()) + syn_flood_warning(skb); #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; @@ -1328,7 +1331,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (want_cookie) { #ifdef CONFIG_SYN_COOKIES - syn_flood_warning(skb); req->cookie_ts = tmp_opt.tstamp_ok; #endif isn = cookie_v4_init_sequence(sk, skb, &req->mss); -- cgit v1.1 From af9b4738574b46025de7ccbe75c7b24fd8914379 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:43:44 +0000 Subject: syncookies: avoid unneeded tcp header flag double check caller: if (!th->rst && !th->syn && th->ack) callee: if (!th->ack) make the caller only check for !syn (common for 3whs), and move the !rst / ack test to the callee. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5c24db4..c9dac86 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -266,7 +266,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct rtable *rt; __u8 rcv_wscale; - if (!sysctl_tcp_syncookies || !th->ack) + if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk) || diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a13f881..6558dfd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1506,7 +1506,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) } #ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) + if (!th->syn) sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); #endif return sk; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 1238370..9fcb3ec 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -174,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; - if (!sysctl_tcp_syncookies || !th->ack) + if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk) || diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e487080..5887141 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1157,7 +1157,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) } #ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) + if (!th->syn) sk = cookie_v6_check(sk, skb); #endif return sk; -- cgit v1.1 From 5918e2fb9035b35164002c3a268feaf70b7c04d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Jun 2010 00:43:57 +0000 Subject: syncookies: update mss tables - ipv6 msstab: account for ipv6 header size - ipv4 msstab: add mss for Jumbograms. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 38 +++++++++++++++++++------------------- net/ipv6/syncookies.c | 39 +++++++++++++++------------------------ 2 files changed, 34 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c9dac86..a7cbcc4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -138,23 +138,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, } /* - * This table has to be sorted and terminated with (__u16)-1. - * XXX generate a better table. - * Unresolved Issues: HIPPI with a 64k MSS is not well supported. + * MSS Values are taken from the 2009 paper + * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson: + * - values 1440 to 1460 accounted for 80% of observed mss values + * - values outside the 536-1460 range are rare (<0.2%). + * + * Table must be sorted. */ static __u16 const msstab[] = { - 64 - 1, - 256 - 1, - 512 - 1, - 536 - 1, - 1024 - 1, - 1440 - 1, - 1460 - 1, - 4312 - 1, - (__u16)-1 + 64, + 512, + 536, + 1024, + 1440, + 1460, + 4312, + 8960, }; -/* The number doesn't include the -1 terminator */ -#define NUM_MSS (ARRAY_SIZE(msstab) - 1) /* * Generate a syncookie. mssp points to the mss, which is returned @@ -169,10 +169,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) tcp_synq_overflow(sk); - /* XXX sort msstab[] by probability? Binary search? */ - for (mssind = 0; mss > msstab[mssind + 1]; mssind++) - ; - *mssp = msstab[mssind] + 1; + for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) + if (mss >= msstab[mssind]) + break; + *mssp = msstab[mssind]; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); @@ -202,7 +202,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) jiffies / (HZ * 60), COUNTER_TRIES); - return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; + return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9fcb3ec..70d330f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -27,28 +27,17 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -/* - * This table has to be sorted and terminated with (__u16)-1. - * XXX generate a better table. - * Unresolved Issues: HIPPI with a 64k MSS is not well supported. - * - * Taken directly from ipv4 implementation. - * Should this list be modified for ipv6 use or is it close enough? - * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart - */ +/* Table must be sorted. */ static __u16 const msstab[] = { - 64 - 1, - 256 - 1, - 512 - 1, - 536 - 1, - 1024 - 1, - 1440 - 1, - 1460 - 1, - 4312 - 1, - (__u16)-1 + 64, + 512, + 536, + 1280 - 60, + 1480 - 60, + 1500 - 60, + 4460 - 60, + 9000 - 60, }; -/* The number doesn't include the -1 terminator */ -#define NUM_MSS (ARRAY_SIZE(msstab) - 1) /* * This (misnamed) value is the age of syncookie which is permitted. @@ -134,9 +123,11 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) tcp_synq_overflow(sk); - for (mssind = 0; mss > msstab[mssind + 1]; mssind++) - ; - *mssp = msstab[mssind] + 1; + for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) + if (mss >= msstab[mssind]) + break; + + *mssp = msstab[mssind]; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); @@ -154,7 +145,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) th->source, th->dest, seq, jiffies / (HZ * 60), COUNTER_TRIES); - return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; + return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) -- cgit v1.1 From b78462ebc6a4ef9074aa80abebcdd470dc5f0ce0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 2 Jun 2010 12:24:37 +0000 Subject: skbuff: add check for non-linear to warn_if_lro and needs_linearize We can avoid an unecessary cache miss by checking if the skb is non-linear before accessing gso_size/gso_type in skb_warn_if_lro, the same can also be done to avoid a cache miss on nr_frags if data_len is 0. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/core/dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ec01a59..3abb3a6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2103,9 +2103,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, static inline int skb_needs_linearize(struct sk_buff *skb, struct net_device *dev) { - return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || - illegal_highdma(dev, skb))); + return skb_is_nonlinear(skb) && + ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || + illegal_highdma(dev, skb)))); } /** -- cgit v1.1 From 72e09ad107e78d69ff4d3b97a69f0aad2b77280f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 Jun 2010 03:03:30 -0700 Subject: ipv6: avoid high order allocations With mtu=9000, mld_newpack() use order-2 GFP_ATOMIC allocations, that are very unreliable, on machines where PAGE_SIZE=4K Limit allocated skbs to be at most one page. (order-0 allocations) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 59f1881..ab1622d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1356,7 +1356,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); + size += LL_ALLOCATED_SPACE(dev); + /* limit our allocations to order-0 page */ + size = min_t(int, size, SKB_MAX_ORDER(0, 0)); + skb = sock_alloc_send_skb(sk, size, 1, &err); if (!skb) return NULL; -- cgit v1.1 From 8ffb335e8d696affc04f963bf73ce2196f80edb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jun 2010 15:34:40 -0700 Subject: ip6mr: fix a typo in ip6mr_for_each_table() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 073071f..89c0b07 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -120,7 +120,7 @@ static void mroute_clean_tables(struct mr6_table *mrt); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES -#define ip6mr_for_each_table(mrt, met) \ +#define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) -- cgit v1.1 From a8b690f98baf9fb1902b8eeab801351ea603fa3a Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 7 Jun 2010 00:43:42 -0700 Subject: tcp: Fix slowness in read /proc/net/tcp This patch address a serious performance issue in reading the TCP sockets table (/proc/net/tcp). Reading the full table is done by a number of sequential read operations. At each read operation, a seek is done to find the last socket that was previously read. This seek operation requires that the sockets in the table need to be counted up to the current file position, and to count each of these requires taking a lock for each non-empty bucket. The whole algorithm is O(n^2). The fix is to cache the last bucket value, offset within the bucket, and the file position returned by the last read operation. On the next sequential read, the bucket and offset are used to find the last read socket immediately without needing ot scan the previous buckets the table. This algorithm t read the whole table is O(n). The improvement offered by this patch is easily show by performing cat'ing /proc/net/tcp on a machine with a lot of connections. With about 182K connections in the table, I see the following: - Without patch time cat /proc/net/tcp > /dev/null real 1m56.729s user 0m0.214s sys 1m56.344s - With patch time cat /proc/net/tcp > /dev/null real 0m0.894s user 0m0.290s sys 0m0.594s Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index acdc4c9..7f976af 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1980,6 +1980,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } +/* + * Get next listener socket follow cur. If cur is NULL, get first socket + * starting from bucket given in st->bucket; when st->bucket is zero the + * very first socket in the hash table is returned. + */ static void *listening_get_next(struct seq_file *seq, void *cur) { struct inet_connection_sock *icsk; @@ -1990,14 +1995,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct net *net = seq_file_net(seq); if (!sk) { - st->bucket = 0; - ilb = &tcp_hashinfo.listening_hash[0]; + ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); sk = sk_nulls_head(&ilb->head); + st->offset = 0; goto get_sk; } ilb = &tcp_hashinfo.listening_hash[st->bucket]; ++st->num; + ++st->offset; if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; @@ -2012,6 +2018,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } + st->offset = 0; if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: @@ -2047,6 +2054,7 @@ start_req: read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } spin_unlock_bh(&ilb->lock); + st->offset = 0; if (++st->bucket < INET_LHTABLE_SIZE) { ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); @@ -2060,7 +2068,12 @@ out: static void *listening_get_idx(struct seq_file *seq, loff_t *pos) { - void *rc = listening_get_next(seq, NULL); + struct tcp_iter_state *st = seq->private; + void *rc; + + st->bucket = 0; + st->offset = 0; + rc = listening_get_next(seq, NULL); while (rc && *pos) { rc = listening_get_next(seq, rc); @@ -2075,13 +2088,18 @@ static inline int empty_bucket(struct tcp_iter_state *st) hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } +/* + * Get first established socket starting from bucket given in st->bucket. + * If st->bucket is zero, the very first socket in the hash is returned. + */ static void *established_get_first(struct seq_file *seq) { struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); void *rc = NULL; - for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { + st->offset = 0; + for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { struct sock *sk; struct hlist_nulls_node *node; struct inet_timewait_sock *tw; @@ -2126,6 +2144,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) struct net *net = seq_file_net(seq); ++st->num; + ++st->offset; if (st->state == TCP_SEQ_STATE_TIME_WAIT) { tw = cur; @@ -2142,6 +2161,7 @@ get_tw: st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ + st->offset = 0; while (++st->bucket <= tcp_hashinfo.ehash_mask && empty_bucket(st)) ; @@ -2169,7 +2189,11 @@ out: static void *established_get_idx(struct seq_file *seq, loff_t pos) { - void *rc = established_get_first(seq); + struct tcp_iter_state *st = seq->private; + void *rc; + + st->bucket = 0; + rc = established_get_first(seq); while (rc && pos) { rc = established_get_next(seq, rc); @@ -2194,24 +2218,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) return rc; } +static void *tcp_seek_last_pos(struct seq_file *seq) +{ + struct tcp_iter_state *st = seq->private; + int offset = st->offset; + int orig_num = st->num; + void *rc = NULL; + + switch (st->state) { + case TCP_SEQ_STATE_OPENREQ: + case TCP_SEQ_STATE_LISTENING: + if (st->bucket >= INET_LHTABLE_SIZE) + break; + st->state = TCP_SEQ_STATE_LISTENING; + rc = listening_get_next(seq, NULL); + while (offset-- && rc) + rc = listening_get_next(seq, rc); + if (rc) + break; + st->bucket = 0; + /* Fallthrough */ + case TCP_SEQ_STATE_ESTABLISHED: + case TCP_SEQ_STATE_TIME_WAIT: + st->state = TCP_SEQ_STATE_ESTABLISHED; + if (st->bucket > tcp_hashinfo.ehash_mask) + break; + rc = established_get_first(seq); + while (offset-- && rc) + rc = established_get_next(seq, rc); + } + + st->num = orig_num; + + return rc; +} + static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) { struct tcp_iter_state *st = seq->private; + void *rc; + + if (*pos && *pos == st->last_pos) { + rc = tcp_seek_last_pos(seq); + if (rc) + goto out; + } + st->state = TCP_SEQ_STATE_LISTENING; st->num = 0; - return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + st->bucket = 0; + st->offset = 0; + rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + +out: + st->last_pos = *pos; + return rc; } static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct tcp_iter_state *st = seq->private; void *rc = NULL; - struct tcp_iter_state *st; if (v == SEQ_START_TOKEN) { rc = tcp_get_idx(seq, 0); goto out; } - st = seq->private; switch (st->state) { case TCP_SEQ_STATE_OPENREQ: @@ -2219,6 +2291,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) rc = listening_get_next(seq, v); if (!rc) { st->state = TCP_SEQ_STATE_ESTABLISHED; + st->bucket = 0; + st->offset = 0; rc = established_get_first(seq); } break; @@ -2229,6 +2303,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } out: ++*pos; + st->last_pos = *pos; return rc; } @@ -2267,6 +2342,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; + s->last_pos = 0; return 0; } -- cgit v1.1 From fe33147a58e7d1d3086bf823aabfd491d843be82 Mon Sep 17 00:00:00 2001 From: Alex Lorca Date: Mon, 7 Jun 2010 01:01:22 -0700 Subject: net-caif: Added missing lock validator constants CAIF is using "xxx-AF_MAX" strings for the lock validator. It should use its own strings. Signed-off-by: Alex Lorca Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 2cf7f9f..f9ce0db 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -156,7 +156,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { @@ -172,7 +172,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { @@ -188,7 +188,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_MAX" }; -- cgit v1.1 From 1789a640f55658d9a54c1868cc3405e4d85dbd8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Jun 2010 22:23:57 +0000 Subject: raw: avoid two atomics in xmit Avoid two atomic ops per raw_send_hdrinc() call Avoid two atomic ops per raw6_send_hdrinc() call Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 8 +++++--- net/ipv6/raw.c | 12 +++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c7a163..66cc3be 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, - struct rtable *rt, + struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); @@ -323,6 +323,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, struct sk_buff *skb; unsigned int iphlen; int err; + struct rtable *rt = *rtp; if (length > rt->u.dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, @@ -341,7 +342,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, &rt->u.dst); + *rtp = NULL; skb_reset_network_header(skb); iph = ip_hdr(skb); @@ -576,7 +578,7 @@ back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, msg->msg_iov, len, - rt, msg->msg_flags); + &rt, msg->msg_flags); else { if (!ipc.addr) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 864eb8e..968b964 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -602,13 +602,14 @@ out: } static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, - struct flowi *fl, struct rt6_info *rt, + struct flowi *fl, struct dst_entry **dstp, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; int err; + struct rt6_info *rt = (struct rt6_info *)*dstp; if (length > rt->u.dst.dev->mtu) { ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); @@ -626,7 +627,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, &rt->u.dst); + *dstp = NULL; skb_put(skb, length); skb_reset_network_header(skb); @@ -886,9 +888,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, goto do_confirm; back_from_confirm: - if (inet->hdrincl) { - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); - } else { + if (inet->hdrincl) + err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); + else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, -- cgit v1.1 From f2a03367c072150c881fa23ce3d3f76b8236018f Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 4 Jun 2010 01:33:33 +0000 Subject: htb: remove two unnecessary assignments remove two unnecessary assignments we don't need to assign NULL when initialize structure objects. Signed-off-by: Changli Gao ---- net/sched/sch_htb.c | 2 -- 1 file changed, 2 deletions(-) Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0b52b8de..4be8d04 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1550,7 +1550,6 @@ static const struct Qdisc_class_ops htb_class_ops = { }; static struct Qdisc_ops htb_qdisc_ops __read_mostly = { - .next = NULL, .cl_ops = &htb_class_ops, .id = "htb", .priv_size = sizeof(struct htb_sched), @@ -1561,7 +1560,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .init = htb_init, .reset = htb_reset, .destroy = htb_destroy, - .change = NULL /* htb_change */, .dump = htb_dump, .owner = THIS_MODULE, }; -- cgit v1.1 From 9dacaf17a60101a55d456cc7b00e269d8145aa0d Mon Sep 17 00:00:00 2001 From: jamal Date: Fri, 4 Jun 2010 02:43:06 +0000 Subject: net sched: make pedit check for clones instead Now that the core path doesnt set OK to munge we detect writable skbs by looking to see if they are cloned. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 50e3d94..a0593c9 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -127,8 +127,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, int i, munged = 0; unsigned int off; - if (!(skb->tc_verd & TC_OK2MUNGE)) { - /* should we set skb->cloned? */ + if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { return p->tcf_action; } -- cgit v1.1 From 271c1dfa61bc90a57648ff96f3eb92d4b4d4f11e Mon Sep 17 00:00:00 2001 From: jamal Date: Fri, 4 Jun 2010 02:06:22 +0000 Subject: net: Remove unnecessary net action assertion The extra assertion to allow packet munging only when there are no other ptypes listening which may have worked around an old bug is unnecessary. It is sufficient to check if the skb is cloned before trampling on it. Thanks to Herbert Xu for being persistent and patient in getting this across. [Note that cloning checks and assertions are the general rule used by tc actions (documentation/networking/tc-actions-env-rules.txt)]. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/core/dev.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b65347c..c8d1277 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2663,9 +2663,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; - } else { - /* Huh? Why does turning on AF_PACKET affect this? */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); } switch (ing_filter(skb)) { -- cgit v1.1 From 035320d54758e21227987e3aae0d46e7a04f4ddc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 6 Jun 2010 23:48:40 +0000 Subject: ipmr: dont corrupt lists ipmr_rules_exit() and ip6mr_rules_exit() free a list of items, but forget to properly remove these items from list. List head is not changed and still points to freed memory. This can trigger a fault later when icmpv6_sk_exit() is called. Fix is to either reinit list, or use list_del() to properly remove items from list before freeing them. bugzilla report : https://bugzilla.kernel.org/show_bug.cgi?id=16120 Introduced by commit d1db275dd3f6e4 (ipv6: ip6mr: support multiple tables) and commit f0ad0860d01e (ipv4: ipmr: support multiple tables) Reported-by: Alex Zhavnerchik Signed-off-by: Eric Dumazet CC: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6mr.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 856123f..757f25eb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -267,8 +267,10 @@ static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; - list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) + list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { + list_del(&mrt->list); kfree(mrt); + } fib_rules_unregister(net->ipv4.mr_rules_ops); } #else diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 89c0b07..66078da 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -254,8 +254,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; - list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) + list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { + list_del(&mrt->list); ip6mr_free_table(mrt); + } fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else -- cgit v1.1 From 35dd0509b21e4b5bab36b9eb80c8dab0322f5007 Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Mon, 7 Jun 2010 16:33:49 +0200 Subject: mac80211: fix function pointer check This makes "iw wlan0 dump survey" work again with mac80211-based drivers that support it, e.g. ath5k. Signed-off-by: Holger Schurig Signed-off-by: John W. Linville --- net/mac80211/driver-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4f22713..9c1da08 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -349,7 +349,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, struct survey_info *survey) { int ret = -EOPNOTSUPP; - if (local->ops->conf_tx) + if (local->ops->get_survey) ret = local->ops->get_survey(&local->hw, idx, survey); /* trace_drv_get_survey(local, idx, survey, ret); */ return ret; -- cgit v1.1 From 11b7c60988e5fbabb4e150612931cc068559af16 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 7 Jun 2010 15:02:17 -0400 Subject: mac80211: fix lock leak w/ ARP filtering and w/o CONFIG_INET "mac80211: make ARP filtering depend on CONFIG_INET" introduced this potential locking leak. Reported-by: Juuso Oikarinen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3623bb7..9420cf1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2078,10 +2078,9 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta); return WORK_DONE_DESTROY; -#ifdef CONFIG_INET } else { mutex_unlock(&wk->sdata->u.mgd.mtx); - +#ifdef CONFIG_INET /* * configure ARP filter IP addresses to the driver, * intentionally outside the mgd mutex. -- cgit v1.1 From 66018506e15bea62de4eefc3298f170b4bfcf5ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 03:12:08 +0000 Subject: ip: Router Alert RCU conversion Straightforward conversion to RCU. One rwlock becomes a spinlock, and is static. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 11 +++-------- net/ipv4/ip_sockglue.c | 23 ++++++++++++++--------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d52c9da..d274078 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -146,7 +146,7 @@ #include /* - * Process Router Attention IP option + * Process Router Attention IP option (RFC 2113) */ int ip_call_ra_chain(struct sk_buff *skb) { @@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb) struct sock *last = NULL; struct net_device *dev = skb->dev; - read_lock(&ip_ra_lock); - for (ra = ip_ra_chain; ra; ra = ra->next) { + for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report @@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb) sk->sk_bound_dev_if == dev->ifindex) && net_eq(sock_net(sk), dev_net(dev))) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { - read_unlock(&ip_ra_lock); + if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) return 1; - } } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb) if (last) { raw_rcv(last, skb); - read_unlock(&ip_ra_lock); return 1; } - read_unlock(&ip_ra_lock); return 0; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ce23178..08b9519 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -239,7 +239,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) sent to multicast group to reach destination designated router. */ struct ip_ra_chain *ip_ra_chain; -DEFINE_RWLOCK(ip_ra_lock); +static DEFINE_SPINLOCK(ip_ra_lock); + +static void ip_ra_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_ra_chain, rcu)); +} int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)) @@ -251,35 +256,35 @@ int ip_ra_control(struct sock *sk, unsigned char on, new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - write_lock_bh(&ip_ra_lock); + spin_lock_bh(&ip_ra_lock); for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); kfree(new_ra); return -EADDRINUSE; } - *rap = ra->next; - write_unlock_bh(&ip_ra_lock); + rcu_assign_pointer(*rap, ra->next); + spin_unlock_bh(&ip_ra_lock); if (ra->destructor) ra->destructor(sk); sock_put(sk); - kfree(ra); + call_rcu(&ra->rcu, ip_ra_free_rcu); return 0; } } if (new_ra == NULL) { - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); return -ENOBUFS; } new_ra->sk = sk; new_ra->destructor = destructor; new_ra->next = ra; - *rap = new_ra; + rcu_assign_pointer(*rap, new_ra); sock_hold(sk); - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); return 0; } -- cgit v1.1 From 9a57a9d291980302b4a3184fbc47dbddac71903e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 03:17:10 +0000 Subject: igmp: avoid two atomic ops in igmp_rcv() in_dev_get() -> __in_dev_get_rcu() in a rcu protected function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 250cb5e..3294f54 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, read_unlock(&in_dev->mc_list_lock); } +/* called in rcu_read_lock() section */ int igmp_rcv(struct sk_buff *skb) { /* This basically follows the spec line by line -- see RFC1112 */ struct igmphdr *ih; - struct in_device *in_dev = in_dev_get(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); int len = skb->len; if (in_dev == NULL) goto drop; if (!pskb_may_pull(skb, sizeof(struct igmphdr))) - goto drop_ref; + goto drop; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; if (__skb_checksum_complete(skb)) - goto drop_ref; + goto drop; } ih = igmp_hdr(skb); @@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb) break; case IGMP_PIM: #ifdef CONFIG_IP_PIMSM_V1 - in_dev_put(in_dev); return pim_rcv_v1(skb); #endif case IGMPV3_HOST_MEMBERSHIP_REPORT: @@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb) break; } -drop_ref: - in_dev_put(in_dev); drop: kfree_skb(skb); return 0; -- cgit v1.1 From ed7865a47d4759e85bbd7bed44bee411d868eaad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 21:49:44 -0700 Subject: ipv4: avoid two atomic ops in ip_rt_redirect() in_dev_get() -> __in_dev_get_rcu() in a rcu protected function. [ Fix build with CONFIG_IP_ROUTE_VERBOSE disabled. -DaveM ] Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7b8eacd..883b5c7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1343,11 +1343,12 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } +/* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { int i, k; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct rtable *rth, **rthp; __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; @@ -1383,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rthp=&rt_hash_table[hash].chain; - rcu_read_lock(); while ((rth = rcu_dereference(*rthp)) != NULL) { struct rtable *rt; @@ -1405,12 +1405,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, break; dst_hold(&rth->u.dst); - rcu_read_unlock(); rt = dst_alloc(&ipv4_dst_ops); if (rt == NULL) { ip_rt_put(rth); - in_dev_put(in_dev); return; } @@ -1463,12 +1461,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, ip_rt_put(rt); goto do_next; } - rcu_read_unlock(); do_next: ; } } - in_dev_put(in_dev); return; reject_redirect: @@ -1479,7 +1475,7 @@ reject_redirect: &old_gw, dev->name, &new_gw, &saddr, &daddr); #endif - in_dev_put(in_dev); + ; } static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) -- cgit v1.1 From 6e8b11b43b0c2e901734e2cdd70c6e325a90c4ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 03:54:46 +0000 Subject: net: avoid two atomic ops in ip_rcv_options() in_dev_get() -> __in_dev_get_rcu() in a rcu protected function. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d274078..08a3b12 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -293,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb) } if (unlikely(opt->srr)) { - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); + if (in_dev) { if (!IN_DEV_SOURCE_ROUTE(in_dev)) { if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) printk(KERN_INFO "source route option %pI4 -> %pI4\n", &iph->saddr, &iph->daddr); - in_dev_put(in_dev); goto drop; } - - in_dev_put(in_dev); } if (ip_options_rcv_srr(skb)) -- cgit v1.1 From bb69ae049fcc986fcd742eb90ca0d44a7a49c9f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 11:42:13 +0000 Subject: anycast: Some RCU conversions - dev_get_by_flags() changed to dev_get_by_flags_rcu() - ipv6_sock_ac_join() dont touch dev & idev refcounts - ipv6_sock_ac_drop() dont touch dev & idev refcounts - ipv6_sock_ac_close() dont touch dev & idev refcounts - ipv6_dev_ac_dec() dount touch idev refcount - ipv6_chk_acast_addr() dont touch idev refcount Signed-off-by: Eric Dumazet CC: Hideaki YOSHIFUJI Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++------ net/ipv6/anycast.c | 90 +++++++++++++++++++++++++----------------------------- 2 files changed, 47 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index c8d1277..6f330ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -803,35 +803,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * dev_get_by_flags - find any device with given flags + * dev_get_by_flags_rcu - find any device with given flags * @net: the applicable net namespace * @if_flags: IFF_* values * @mask: bitmask of bits in if_flags to check * * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. The device returned has - * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. + * is not found or a pointer to the device. Must be called inside + * rcu_read_lock(), and result refcount is unchanged. */ -struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; - rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { - dev_hold(dev); ret = dev; break; } } - rcu_read_unlock(); return ret; } -EXPORT_SYMBOL(dev_get_by_flags); +EXPORT_SYMBOL(dev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index b5b0705..f058fbd 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) pac->acl_next = NULL; ipv6_addr_copy(&pac->acl_addr, addr); + rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); dst_release(&rt->u.dst); } else if (ishost) { err = -EADDRNOTAVAIL; - goto out_free_pac; + goto error; } else { /* router, no matching interface: just pick one */ - - dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags_rcu(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (dev == NULL) { err = -ENODEV; - goto out_free_pac; + goto error; } - idev = in6_dev_get(dev); + idev = __in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; else err = -EADDRNOTAVAIL; - goto out_dev_put; + goto error; } /* reset ishost, now that we have a specific device */ ishost = !idev->cnf.forwarding; - in6_dev_put(idev); pac->acl_ifindex = dev->ifindex; @@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto out_dev_put; + goto error; } err = ipv6_dev_ac_inc(dev, addr); - if (err) - goto out_dev_put; - - write_lock_bh(&ipv6_sk_ac_lock); - pac->acl_next = np->ipv6_ac_list; - np->ipv6_ac_list = pac; - write_unlock_bh(&ipv6_sk_ac_lock); - - dev_put(dev); - - return 0; + if (!err) { + write_lock_bh(&ipv6_sk_ac_lock); + pac->acl_next = np->ipv6_ac_list; + np->ipv6_ac_list = pac; + write_unlock_bh(&ipv6_sk_ac_lock); + pac = NULL; + } -out_dev_put: - dev_put(dev); -out_free_pac: - sock_kfree_s(sk, pac, sizeof(*pac)); +error: + rcu_read_unlock(); + if (pac) + sock_kfree_s(sk, pac, sizeof(*pac)); return err; } @@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(net, pac->acl_ifindex); - if (dev) { + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - dev_put(dev); - } + rcu_read_unlock(); + sock_kfree_s(sk, pac, sizeof(*pac)); return 0; } @@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk) write_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; + rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - if (dev) - dev_put(dev); - dev = dev_get_by_index(net, pac->acl_ifindex); + dev = dev_get_by_index_rcu(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } - if (dev) - dev_put(dev); + rcu_read_unlock(); } #if 0 @@ -363,33 +357,32 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) return 0; } +/* called with rcu_read_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) { - int ret; - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); + if (idev == NULL) return -ENODEV; - ret = __ipv6_dev_ac_dec(idev, addr); - in6_dev_put(idev); - return ret; + return __ipv6_dev_ac_dec(idev, addr); } /* * check if the interface has this anycast address + * called with rcu_read_lock() */ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) { struct inet6_dev *idev; struct ifacaddr6 *aca; - idev = in6_dev_get(dev); + idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); for (aca = idev->ac_list; aca; aca = aca->aca_next) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; read_unlock_bh(&idev->lock); - in6_dev_put(idev); return aca != NULL; } return 0; @@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, { int found = 0; - if (dev) - return ipv6_chk_acast_dev(dev, addr); rcu_read_lock(); - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { - found = 1; - break; - } + if (dev) + found = ipv6_chk_acast_dev(dev, addr); + else + for_each_netdev_rcu(net, dev) + if (ipv6_chk_acast_dev(dev, addr)) { + found = 1; + break; + } rcu_read_unlock(); return found; } -- cgit v1.1 From 90b726097ba0dcc1f9725182513e669a30e77db5 Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Mon, 7 Jun 2010 10:52:12 +0300 Subject: mac80211: Add netif state checking to ieee80211_ifa_changed There's a window for ieee80211_ifa_changed() to get called whilst the managed mode mutex has not been initialized when opening and stopping the interface. Currently this causes a kernel BUG like the following: [ 132.460013] kernel BUG at /home/wifi/iwlwifi-2.6/net/mac80211/main.c:380! [ 132.460013] invalid opcode: 0000 [#1] SMP The mutex is initialized during open(), hence once netif_running() is true, the mutex should be valid. Fix by adding a netif_running() check to the function. Reported-by: Reinette Chatre Signed-off-by: Juuso Oikarinen Tested-by: Reinette Chatre Signed-off-by: John W. Linville --- net/mac80211/main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5706156..88b671a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -359,6 +359,9 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, struct ieee80211_sub_if_data *sdata; struct ieee80211_if_managed *ifmgd; + if (!netif_running(ndev)) + return NOTIFY_DONE; + /* Make sure it's our interface that got changed */ if (!wdev) return NOTIFY_DONE; -- cgit v1.1 From b054b747a694927879c94dd11af54d04346aed7d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Jun 2010 21:50:07 +0200 Subject: mac80211: fix deauth before assoc When we receive a deauthentication frame before having successfully associated, we neither print a message nor abort assocation. The former makes it hard to debug, while the latter later causes a warning in cfg80211 when, as will typically be the case, association timed out. This warning was reported by many, e.g. in https://bugzilla.kernel.org/show_bug.cgi?id=15981, but I couldn't initially pinpoint it. I verified the fix by hacking hostapd to send a deauth frame instead of an association response. Cc: stable@kernel.org Signed-off-by: Johannes Berg Tested-by: Miles Lane Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3310e70..f803f8b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1760,9 +1760,45 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); if (skb->len >= 24 + 2 /* mgmt + deauth reason */ && - (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) { + struct ieee80211_local *local = sdata->local; + struct ieee80211_work *wk; + + mutex_lock(&local->work_mtx); + list_for_each_entry(wk, &local->work_list, list) { + if (wk->sdata != sdata) + continue; + + if (wk->type != IEEE80211_WORK_ASSOC) + continue; + + if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN)) + continue; + if (memcmp(mgmt->sa, wk->filter_ta, ETH_ALEN)) + continue; + /* + * Printing the message only here means we can't + * spuriously print it, but it also means that it + * won't be printed when the frame comes in before + * we even tried to associate or in similar cases. + * + * Ultimately, I suspect cfg80211 should print the + * messages instead. + */ + printk(KERN_DEBUG + "%s: deauthenticated from %pM (Reason: %u)\n", + sdata->name, mgmt->bssid, + le16_to_cpu(mgmt->u.deauth.reason_code)); + + list_del_rcu(&wk->list); + free_work(wk); + break; + } + mutex_unlock(&local->work_mtx); + + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + } out: kfree_skb(skb); } -- cgit v1.1 From 08c801f8d45387a1b46066aad1789a9bb9c4b645 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 8 Jun 2010 17:51:27 -0600 Subject: net: Print num_rx_queues imbalance warning only when there are allocated queues BugLink: http://bugs.launchpad.net/bugs/591416 There are a number of network drivers (bridge, bonding, etc) that are not yet receive multi-queue enabled and use alloc_netdev(), so don't print a num_rx_queues imbalance warning in that case. Also, only print the warning once for those drivers that _are_ multi-queue enabled. Signed-off-by: Tim Gardner Acked-by: Eric Dumazet --- net/core/dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d03470f..14a8568 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2253,11 +2253,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); if (unlikely(index >= dev->num_rx_queues)) { - if (net_ratelimit()) { - pr_warning("%s received packet on queue " - "%u, but number of RX queues is %u\n", - dev->name, index, dev->num_rx_queues); - } + WARN_ONCE(dev->num_rx_queues > 1, "%s received packet " + "on queue %u, but number of RX queues is %u\n", + dev->name, index, dev->num_rx_queues); goto done; } rxqueue = dev->_rx + index; -- cgit v1.1 From 88e7594a9775e54dcd421cb246406dce62e48bee Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 7 Jun 2010 03:27:39 +0000 Subject: phonet: use call_rcu for phonet device free Use call_rcu rather than synchronize_rcu. Signed-off-by: Jiri Pirko Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index c33da65..b18e48f 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr) return err; } +static void phonet_device_rcu_free(struct rcu_head *head) +{ + struct phonet_device *pnd; + + pnd = container_of(head, struct phonet_device, rcu); + kfree(pnd); +} + int phonet_address_del(struct net_device *dev, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); @@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = NULL; mutex_unlock(&pndevs->lock); - if (pnd) { - synchronize_rcu(); - kfree(pnd); - } + if (pnd) + call_rcu(&pnd->rcu, phonet_device_rcu_free); + return err; } -- cgit v1.1 From aea34e7ae7a40bc72f9f11b5658160dfb4b90c48 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 7 Jun 2010 04:51:58 +0000 Subject: caif: fix a couple range checks The extra ! character means that these conditions are always false. Signed-off-by: Dan Carpenter Acked-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/caif/cfrfml.c | 2 +- net/caif/cfveil.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index cd2830f..fd27b17 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -83,7 +83,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt) if (!cfsrvl_ready(service, &ret)) return ret; - if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { + if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { pr_err("CAIF: %s():Packet too large - size=%d\n", __func__, cfpkt_getlen(pkt)); return -EOVERFLOW; diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 0fd827f..e04f7d9 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -84,7 +84,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt) return ret; caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); - if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { + if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { pr_warning("CAIF: %s(): Packet too large - size=%d\n", __func__, cfpkt_getlen(pkt)); return -EOVERFLOW; -- cgit v1.1 From cfa087f689402438e3cb0f077e649d01c871b0e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 22:34:35 +0000 Subject: icmp: RCU conversion in icmp_address_reply() - rcu_read_lock() already held by caller - use __in_dev_get_rcu() instead of in_dev_get() / in_dev_put() - remove goto out; Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d65e9215..bdb6c71 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -925,6 +925,7 @@ static void icmp_address(struct sk_buff *skb) /* * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain * loudly if an inconsistency is found. + * called with rcu_read_lock() */ static void icmp_address_reply(struct sk_buff *skb) @@ -935,12 +936,12 @@ static void icmp_address_reply(struct sk_buff *skb) struct in_ifaddr *ifa; if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) - goto out; + return; - in_dev = in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) - goto out; - rcu_read_lock(); + return; + if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { @@ -958,9 +959,6 @@ static void icmp_address_reply(struct sk_buff *skb) mp, dev->name, &rt->rt_src); } } - rcu_read_unlock(); - in_dev_put(in_dev); -out:; } static void icmp_discard(struct sk_buff *skb) -- cgit v1.1 From 96b52e61be1ad4d4f8de39b9deaf253da804ea3b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 21:05:02 +0000 Subject: ipv6: mcast: RCU conversions - ipv6_sock_mc_join() : doesnt touch dev refcount - ipv6_sock_mc_drop() : doesnt touch dev/idev refcounts - ip6_mc_find_dev() becomes ip6_mc_find_dev_rcu() (called from rcu), and doesnt touch dev/idev refcounts - ipv6_sock_mc_close() : doesnt touch dev/idev refcounts - ip6_mc_source() uses ip6_mc_find_dev_rcu() - ip6_mc_msfilter() uses ip6_mc_find_dev_rcu() - ip6_mc_msfget() uses ip6_mc_find_dev_rcu() - ipv6_dev_mc_dec(), ipv6_chk_mcast_addr(), igmp6_event_query(), igmp6_event_report(), mld_sendpack(), igmp6_send() dont touch idev refcount Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 183 ++++++++++++++++++++++++++----------------------------- 1 file changed, 87 insertions(+), 96 deletions(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 8752e80..3e36d15 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -152,18 +152,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->next = NULL; ipv6_addr_copy(&mc_lst->addr, addr); + rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (dev == NULL) { + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; } @@ -180,8 +181,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_mc_inc(dev, addr); if (err) { + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - dev_put(dev); return err; } @@ -190,7 +191,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) np->ipv6_mc_list = mc_lst; write_unlock_bh(&ipv6_sk_mc_lock); - dev_put(dev); + rcu_read_unlock(); return 0; } @@ -213,18 +214,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(net, mc_lst->ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, mc_lst->ifindex); if (dev != NULL) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) { + if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - in6_dev_put(idev); - } - dev_put(dev); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return 0; } @@ -234,43 +234,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EADDRNOTAVAIL; } -static struct inet6_dev *ip6_mc_find_dev(struct net *net, - struct in6_addr *group, - int ifindex) +/* called with rcu_read_lock() */ +static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, + struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; if (ifindex == 0) { - struct rt6_info *rt; + struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); - rt = rt6_lookup(net, group, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (!dev) - goto nodev; - idev = in6_dev_get(dev); + return NULL; + idev = __in6_dev_get(dev); if (!idev) - goto release; + return NULL;; read_lock_bh(&idev->lock); - if (idev->dead) - goto unlock_release; - + if (idev->dead) { + read_unlock_bh(&idev->lock); + return NULL; + } return idev; - -unlock_release: - read_unlock_bh(&idev->lock); - in6_dev_put(idev); -release: - dev_put(dev); -nodev: - return NULL; } void ipv6_sock_mc_close(struct sock *sk) @@ -286,19 +279,17 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(net, mc_lst->ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, mc_lst->ifindex); if (dev) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) { + if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - in6_dev_put(idev); - } - dev_put(dev); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); write_lock_bh(&ipv6_sk_mc_lock); @@ -327,14 +318,17 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); - if (!idev) + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface); + if (!idev) { + rcu_read_unlock(); return -ENODEV; + } dev = idev->dev; err = -EADDRNOTAVAIL; - read_lock_bh(&ipv6_sk_mc_lock); + read_lock(&ipv6_sk_mc_lock); for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; @@ -358,7 +352,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } - write_lock_bh(&pmc->sflock); + write_lock(&pmc->sflock); pmclocked = 1; psl = pmc->sflist; @@ -433,11 +427,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, ip6_mc_add_src(idev, group, omode, 1, source, 1); done: if (pmclocked) - write_unlock_bh(&pmc->sflock); - read_unlock_bh(&ipv6_sk_mc_lock); + write_unlock(&pmc->sflock); + read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); if (leavegroup) return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -463,14 +456,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev(net, group, gsf->gf_interface); + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - if (!idev) + if (!idev) { + rcu_read_unlock(); return -ENODEV; + } dev = idev->dev; err = 0; - read_lock_bh(&ipv6_sk_mc_lock); + read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; @@ -512,7 +508,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); } - write_lock_bh(&pmc->sflock); + write_lock(&pmc->sflock); psl = pmc->sflist; if (psl) { (void) ip6_mc_del_src(idev, group, pmc->sfmode, @@ -522,13 +518,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); pmc->sflist = newpsl; pmc->sfmode = gsf->gf_fmode; - write_unlock_bh(&pmc->sflock); + write_unlock(&pmc->sflock); err = 0; done: - read_unlock_bh(&ipv6_sk_mc_lock); + read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -551,11 +546,13 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(net, group, gsf->gf_interface); + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - if (!idev) + if (!idev) { + rcu_read_unlock(); return -ENODEV; - + } dev = idev->dev; err = -EADDRNOTAVAIL; @@ -577,8 +574,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, psl = pmc->sflist; count = psl ? psl->sl_count : 0; read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; @@ -604,8 +600,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return 0; done: read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); return err; } @@ -822,6 +817,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) struct ifmcaddr6 *mc; struct inet6_dev *idev; + /* we need to take a reference on idev */ idev = in6_dev_get(dev); if (idev == NULL) @@ -860,7 +856,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); ipv6_addr_copy(&mc->mca_addr, addr); - mc->idev = idev; + mc->idev = idev; /* (reference taken) */ mc->mca_users = 1; /* mca_stamp should be updated upon changes */ mc->mca_cstamp = mc->mca_tstamp = jiffies; @@ -915,16 +911,18 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev; int err; - if (!idev) - return -ENODEV; - - err = __ipv6_dev_mc_dec(idev, addr); + rcu_read_lock(); - in6_dev_put(idev); + idev = __in6_dev_get(dev); + if (!idev) + err = -ENODEV; + else + err = __ipv6_dev_mc_dec(idev, addr); + rcu_read_unlock(); return err; } @@ -965,7 +963,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, struct ifmcaddr6 *mc; int rv = 0; - idev = in6_dev_get(dev); + rcu_read_lock(); + idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); for (mc = idev->mc_list; mc; mc=mc->next) { @@ -992,8 +991,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rv = 1; /* don't filter unspecified source */ } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } + rcu_read_unlock(); return rv; } @@ -1104,6 +1103,7 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, return 1; } +/* called with rcu_read_lock() */ int igmp6_event_query(struct sk_buff *skb) { struct mld2_query *mlh2 = NULL; @@ -1127,7 +1127,7 @@ int igmp6_event_query(struct sk_buff *skb) if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) return -EINVAL; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (idev == NULL) return 0; @@ -1137,10 +1137,8 @@ int igmp6_event_query(struct sk_buff *skb) group_type = ipv6_addr_type(group); if (group_type != IPV6_ADDR_ANY && - !(group_type&IPV6_ADDR_MULTICAST)) { - in6_dev_put(idev); + !(group_type&IPV6_ADDR_MULTICAST)) return -EINVAL; - } if (len == 24) { int switchback; @@ -1161,10 +1159,9 @@ int igmp6_event_query(struct sk_buff *skb) } else if (len >= 28) { int srcs_offset = sizeof(struct mld2_query) - sizeof(struct icmp6hdr); - if (!pskb_may_pull(skb, srcs_offset)) { - in6_dev_put(idev); + if (!pskb_may_pull(skb, srcs_offset)) return -EINVAL; - } + mlh2 = (struct mld2_query *)skb_transport_header(skb); max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; if (!max_delay) @@ -1173,28 +1170,23 @@ int igmp6_event_query(struct sk_buff *skb) if (mlh2->mld2q_qrv) idev->mc_qrv = mlh2->mld2q_qrv; if (group_type == IPV6_ADDR_ANY) { /* general query */ - if (mlh2->mld2q_nsrcs) { - in6_dev_put(idev); + if (mlh2->mld2q_nsrcs) return -EINVAL; /* no sources allowed */ - } + mld_gq_start_timer(idev); - in6_dev_put(idev); return 0; } /* mark sources to include, if group & source-specific */ if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + - ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) { - in6_dev_put(idev); + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) return -EINVAL; - } + mlh2 = (struct mld2_query *)skb_transport_header(skb); mark = 1; } - } else { - in6_dev_put(idev); + } else return -EINVAL; - } read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { @@ -1227,12 +1219,11 @@ int igmp6_event_query(struct sk_buff *skb) } } read_unlock_bh(&idev->lock); - in6_dev_put(idev); return 0; } - +/* called with rcu_read_lock() */ int igmp6_event_report(struct sk_buff *skb) { struct ifmcaddr6 *ma; @@ -1260,7 +1251,7 @@ int igmp6_event_report(struct sk_buff *skb) !(addr_type&IPV6_ADDR_LINKLOCAL)) return -EINVAL; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (idev == NULL) return -ENODEV; @@ -1280,7 +1271,6 @@ int igmp6_event_report(struct sk_buff *skb) } } read_unlock_bh(&idev->lock); - in6_dev_put(idev); return 0; } @@ -1396,12 +1386,14 @@ static void mld_sendpack(struct sk_buff *skb) struct mld2_report *pmr = (struct mld2_report *)skb_transport_header(skb); int payload_len, mldlen; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev; struct net *net = dev_net(skb->dev); int err; struct flowi fl; struct dst_entry *dst; + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); @@ -1441,8 +1433,7 @@ out: } else IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; err_out: @@ -1779,7 +1770,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - idev = in6_dev_get(skb->dev); + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); if (!dst) { @@ -1806,8 +1798,7 @@ out: } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; err_out: -- cgit v1.1 From 00d9d6a185de89edc0649ca4ead58f0283dfcbac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jun 2010 22:24:44 +0000 Subject: ipv6: fix ICMP6_MIB_OUTERRORS In commit 1f8438a85366 (icmp: Account for ICMP out errors), I did a typo on IPV6 side, using ICMP6_MIB_OUTMSGS instead of ICMP6_MIB_OUTERRORS Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ce79929..03e62f9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -483,7 +483,7 @@ route_done: np->tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } @@ -565,7 +565,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } -- cgit v1.1 From 597a264b1a9c7e36d1728f677c66c5c1f7e3b837 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 3 Jun 2010 09:30:11 +0000 Subject: net: deliver skbs on inactive slaves to exact matches Currently, the accelerated receive path for VLAN's will drop packets if the real device is an inactive slave and is not one of the special pkts tested for in skb_bond_should_drop(). This behavior is different then the non-accelerated path and for pkts over a bonded vlan. For example, vlanx -> bond0 -> ethx will be dropped in the vlan path and not delivered to any packet handlers at all. However, bond0 -> vlanx -> ethx and bond0 -> ethx will be delivered to handlers that match the exact dev, because the VLAN path checks the real_dev which is not a slave and netif_recv_skb() doesn't drop frames but only delivers them to exact matches. This patch adds a sk_buff flag which is used for tagging skbs that would previously been dropped and allows the skb to continue to skb_netif_recv(). Here we add logic to check for the deliver_no_wcard flag and if it is set only deliver to handlers that match exactly. This makes both paths above consistent and gives pkt handlers a way to identify skbs that come from inactive slaves. Without this patch in some configurations skbs will be delivered to handlers with exact matches and in others be dropped out right in the vlan path. I have tested the following 4 configurations in failover modes and load balancing modes. # bond0 -> ethx # vlanx -> bond0 -> ethx # bond0 -> vlanx -> ethx # bond0 -> ethx | vlanx -> -- Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 4 ++-- net/core/dev.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index bd537fc..50f58f5f 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -12,7 +12,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, return NET_RX_DROP; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) - goto drop; + skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); @@ -84,7 +84,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, struct sk_buff *p; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) - goto drop; + skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); diff --git a/net/core/dev.c b/net/core/dev.c index 14a8568..2b3bf53 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2810,13 +2810,24 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; + /* + * bonding note: skbs received on inactive slaves should only + * be delivered to pkt handlers that are exact matches. Also + * the deliver_no_wcard flag will be set. If packet handlers + * are sensitive to duplicate packets these skbs will need to + * be dropped at the handler. The vlan accel path may have + * already set the deliver_no_wcard flag. + */ null_or_orig = NULL; orig_dev = skb->dev; master = ACCESS_ONCE(orig_dev->master); - if (master) { - if (skb_bond_should_drop(skb, master)) + if (skb->deliver_no_wcard) + null_or_orig = orig_dev; + else if (master) { + if (skb_bond_should_drop(skb, master)) { + skb->deliver_no_wcard = 1; null_or_orig = orig_dev; /* deliver only exact match */ - else + } else skb->dev = master; } -- cgit v1.1 From 592fcb9dfafaa02dd0edc207bf5d3a0ee7a1f8df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jun 2010 16:21:07 +0000 Subject: ip: ip_ra_control() rcu fix commit 66018506e15b (ip: Router Alert RCU conversion) introduced RCU lookups to ip_call_ra_chain(). It missed proper deinit phase : When ip_ra_control() deletes an ip_ra_chain, it should make sure ip_call_ra_chain() users can not start to use socket during the rcu grace period. It should also delay the sock_put() after the grace period, or we risk a premature socket freeing and corruptions, as raw sockets are not rcu protected yet. This delay avoids using expensive atomic_inc_not_zero() in ip_call_ra_chain(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 08b9519..47fff52 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -241,9 +241,13 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) struct ip_ra_chain *ip_ra_chain; static DEFINE_SPINLOCK(ip_ra_lock); -static void ip_ra_free_rcu(struct rcu_head *head) + +static void ip_ra_destroy_rcu(struct rcu_head *head) { - kfree(container_of(head, struct ip_ra_chain, rcu)); + struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); + + sock_put(ra->saved_sk); + kfree(ra); } int ip_ra_control(struct sock *sk, unsigned char on, @@ -264,13 +268,20 @@ int ip_ra_control(struct sock *sk, unsigned char on, kfree(new_ra); return -EADDRINUSE; } + /* dont let ip_call_ra_chain() use sk again */ + ra->sk = NULL; rcu_assign_pointer(*rap, ra->next); spin_unlock_bh(&ip_ra_lock); if (ra->destructor) ra->destructor(sk); - sock_put(sk); - call_rcu(&ra->rcu, ip_ra_free_rcu); + /* + * Delay sock_put(sk) and kfree(ra) after one rcu grace + * period. This guarantee ip_call_ra_chain() dont need + * to mess with socket refcounts. + */ + ra->saved_sk = sk; + call_rcu(&ra->rcu, ip_ra_destroy_rcu); return 0; } } -- cgit v1.1 From ae638c47dc040b8def16d05dc6acdd527628f231 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Jun 2010 23:39:10 +0000 Subject: pkt_sched: gen_estimator: add a new lock gen_kill_estimator() / gen_new_estimator() is not always called with RTNL held. net/netfilter/xt_RATEEST.c is one user of these API that do not hold RTNL, so random corruptions can occur between "tc" and "iptables". Add a new fine grained lock instead of trying to use RTNL in netfilter. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index cf8e703..785e527 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock); /* Protects against soft lockup during large deletion */ static struct rb_root est_root = RB_ROOT; +static DEFINE_SPINLOCK(est_tree_lock); static void est_timer(unsigned long arg) { @@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * * Returns 0 on success or a negative error code. * - * NOTE: Called under rtnl_mutex */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, @@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; + spin_lock(&est_tree_lock); if (!elist[idx].timer.function) { INIT_LIST_HEAD(&elist[idx].list); setup_timer(&elist[idx].timer, est_timer, idx); @@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, list_add_rcu(&est->list, &elist[idx].list); gen_add_node(est); + spin_unlock(&est_tree_lock); return 0; } @@ -261,13 +263,13 @@ static void __gen_kill_estimator(struct rcu_head *head) * * Removes the rate estimator specified by &bstats and &rate_est. * - * NOTE: Called under rtnl_mutex */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) { struct gen_estimator *e; + spin_lock(&est_tree_lock); while ((e = gen_find_node(bstats, rate_est))) { rb_erase(&e->node, &est_root); @@ -278,6 +280,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, list_del_rcu(&e->list); call_rcu(&e->e_rcu, __gen_kill_estimator); } + spin_unlock(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -312,8 +315,14 @@ EXPORT_SYMBOL(gen_replace_estimator); bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { + bool res; + ASSERT_RTNL(); - return gen_find_node(bstats, rate_est) != NULL; + spin_lock(&est_tree_lock); + res = gen_find_node(bstats, rate_est) != NULL; + spin_unlock(&est_tree_lock); + + return res; } EXPORT_SYMBOL(gen_estimator_active); -- cgit v1.1 From 07a0f0f07a68014c92c752a5598102372bddf46e Mon Sep 17 00:00:00 2001 From: Daniel Turull Date: Thu, 10 Jun 2010 23:08:11 -0700 Subject: pktgen: Fix accuracy of inter-packet delay. This patch correct a bug in the delay of pktgen. It makes sure the inter-packet interval is accurate. Signed-off-by: Daniel Turull Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2ad68da..1dacd7b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2170,7 +2170,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) end_time = ktime_now(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); - pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); + pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) -- cgit v1.1 From d8d1f30b95a635dbd610dcc5eb641aca8f4768cf Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Thu, 10 Jun 2010 23:31:35 -0700 Subject: net-next: remove useless union keyword remove useless union keyword in rtable, rt6_info and dn_route. Since there is only one member in a union, the union keyword isn't useful. Signed-off-by: Changli Gao Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/atm/clip.c | 2 +- net/bridge/br_device.c | 2 +- net/bridge/br_netfilter.c | 20 +- net/dccp/ipv4.c | 4 +- net/decnet/dn_route.c | 158 ++++++------ net/ethernet/eth.c | 5 +- net/ipv4/af_inet.c | 4 +- net/ipv4/arp.c | 12 +- net/ipv4/datagram.c | 2 +- net/ipv4/icmp.c | 18 +- net/ipv4/igmp.c | 10 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_forward.c | 10 +- net/ipv4/ip_gre.c | 14 +- net/ipv4/ip_input.c | 4 +- net/ipv4/ip_output.c | 60 ++--- net/ipv4/ipip.c | 8 +- net/ipv4/ipmr.c | 8 +- net/ipv4/netfilter.c | 8 +- net/ipv4/raw.c | 16 +- net/ipv4/route.c | 420 ++++++++++++++++---------------- net/ipv4/syncookies.c | 6 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 4 +- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/addrconf.c | 10 +- net/ipv6/anycast.c | 6 +- net/ipv6/fib6_rules.c | 10 +- net/ipv6/ip6_fib.c | 30 +-- net/ipv6/ip6_output.c | 38 +-- net/ipv6/ip6_tunnel.c | 8 +- net/ipv6/mcast.c | 4 +- net/ipv6/ndisc.c | 8 +- net/ipv6/raw.c | 12 +- net/ipv6/route.c | 300 +++++++++++------------ net/ipv6/sit.c | 8 +- net/l2tp/l2tp_ip.c | 6 +- net/netfilter/ipvs/ip_vs_xmit.c | 86 +++---- net/netfilter/nf_conntrack_h323_main.c | 12 +- net/netfilter/nf_conntrack_netbios_ns.c | 2 +- net/netfilter/xt_TCPMSS.c | 4 +- net/netfilter/xt_TEE.c | 4 +- net/rxrpc/ar-peer.c | 4 +- net/sctp/protocol.c | 4 +- 44 files changed, 674 insertions(+), 683 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index 313aba1..95fdd11 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) error = ip_route_output_key(&init_net, &rt, &fl); if (error) return error; - neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); + neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); ip_rt_put(rt); if (!neigh) return -ENOMEM; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index eedf2c9..b898364 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -127,7 +127,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) #ifdef CONFIG_BRIDGE_NETFILTER /* remember the MTU in the rtable for PMTU */ - br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu; + br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu; #endif return 0; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 4442099..0685b25 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -117,12 +117,12 @@ void br_netfilter_rtable_init(struct net_bridge *br) { struct rtable *rt = &br->fake_rtable; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.dev = br->dev; - rt->u.dst.path = &rt->u.dst; - rt->u.dst.metrics[RTAX_MTU - 1] = 1500; - rt->u.dst.flags = DST_NOXFRM; - rt->u.dst.ops = &fake_dst_ops; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; + rt->dst.path = &rt->dst; + rt->dst.metrics[RTAX_MTU - 1] = 1500; + rt->dst.flags = DST_NOXFRM; + rt->dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) @@ -244,8 +244,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) kfree_skb(skb); return 0; } - dst_hold(&rt->u.dst); - skb_dst_set(skb, &rt->u.dst); + dst_hold(&rt->dst); + skb_dst_set(skb, &rt->dst); skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); @@ -396,8 +396,8 @@ bridged_dnat: kfree_skb(skb); return 0; } - dst_hold(&rt->u.dst); - skb_dst_set(skb, &rt->u.dst); + dst_hold(&rt->dst); + skb_dst_set(skb, &rt->dst); } skb->dev = nf_bridge->physindev; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d9b11ef..d4a166f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; /* OK, now commit destination to socket. */ - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, inet->inet_daddr, @@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return NULL; } - return &rt->u.dst; + return &rt->dst; } static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 812e6df..6585ea6 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst) static inline void dnrt_free(struct dn_route *rt) { - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline void dnrt_drop(struct dn_route *rt) { - dst_release(&rt->u.dst); - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + dst_release(&rt->dst); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static void dn_dst_check_expire(unsigned long dummy) @@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy) spin_lock(&dn_rt_hash_table[i].lock); while((rt=*rtp) != NULL) { - if (atomic_read(&rt->u.dst.__refcnt) || - (now - rt->u.dst.lastuse) < expire) { - rtp = &rt->u.dst.dn_next; + if (atomic_read(&rt->dst.__refcnt) || + (now - rt->dst.lastuse) < expire) { + rtp = &rt->dst.dn_next; continue; } - *rtp = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + *rtp = rt->dst.dn_next; + rt->dst.dn_next = NULL; dnrt_free(rt); } spin_unlock(&dn_rt_hash_table[i].lock); @@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops) rtp = &dn_rt_hash_table[i].chain; while((rt=*rtp) != NULL) { - if (atomic_read(&rt->u.dst.__refcnt) || - (now - rt->u.dst.lastuse) < expire) { - rtp = &rt->u.dst.dn_next; + if (atomic_read(&rt->dst.__refcnt) || + (now - rt->dst.lastuse) < expire) { + rtp = &rt->dst.dn_next; continue; } - *rtp = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + *rtp = rt->dst.dn_next; + rt->dst.dn_next = NULL; dnrt_drop(rt); break; } @@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * while((rth = *rthp) != NULL) { if (compare_keys(&rth->fl, &rt->fl)) { /* Put it first */ - *rthp = rth->u.dst.dn_next; - rcu_assign_pointer(rth->u.dst.dn_next, + *rthp = rth->dst.dn_next; + rcu_assign_pointer(rth->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - dst_use(&rth->u.dst, now); + dst_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); dnrt_drop(rt); *rp = rth; return 0; } - rthp = &rth->u.dst.dn_next; + rthp = &rth->dst.dn_next; } - rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain); + rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - dst_use(&rt->u.dst, now); + dst_use(&rt->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); *rp = rt; return 0; @@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy) goto nothing_to_declare; for(; rt; rt=next) { - next = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + next = rt->dst.dn_next; + rt->dst.dn_next = NULL; dst_free((struct dst_entry *)rt); } @@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb) /* Ensure that we have enough space for headers */ rt = (struct dn_route *)skb_dst(skb); header_len = dn_db->use_long ? 21 : 6; - if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len)) + if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len)) goto drop; /* @@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb) if (++cb->hops > 30) goto drop; - skb->dev = rt->u.dst.dev; + skb->dev = rt->dst.dev; /* * If packet goes out same interface it came in on, then set @@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb) static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; - struct net_device *dev = rt->u.dst.dev; + struct net_device *dev = rt->dst.dev; struct neighbour *n; unsigned mss; @@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - memcpy(rt->u.dst.metrics, fi->fib_metrics, - sizeof(rt->u.dst.metrics)); + memcpy(rt->dst.metrics, fi->fib_metrics, + sizeof(rt->dst.metrics)); } rt->rt_type = res->type; - if (dev != NULL && rt->u.dst.neighbour == NULL) { + if (dev != NULL && rt->dst.neighbour == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - rt->u.dst.neighbour = n; + rt->dst.neighbour = n; } - if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || - dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) - rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || - dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) - rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; + if (dst_metric(&rt->dst, RTAX_MTU) == 0 || + dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) + rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); + if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || + dst_metric(&rt->dst, RTAX_ADVMSS) > mss) + rt->dst.metrics[RTAX_ADVMSS-1] = mss; return 0; } @@ -1096,8 +1096,8 @@ make_route: if (rt == NULL) goto e_nobufs; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.flags = DST_HOST; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.flags = DST_HOST; rt->fl.fld_src = oldflp->fld_src; rt->fl.fld_dst = oldflp->fld_dst; @@ -1113,17 +1113,17 @@ make_route: rt->rt_dst_map = fl.fld_dst; rt->rt_src_map = fl.fld_src; - rt->u.dst.dev = dev_out; + rt->dst.dev = dev_out; dev_hold(dev_out); - rt->u.dst.neighbour = neigh; + rt->dst.neighbour = neigh; neigh = NULL; - rt->u.dst.lastuse = jiffies; - rt->u.dst.output = dn_output; - rt->u.dst.input = dn_rt_bug; + rt->dst.lastuse = jiffies; + rt->dst.output = dn_output; + rt->dst.input = dn_rt_bug; rt->rt_flags = flags; if (flags & RTCF_LOCAL) - rt->u.dst.input = dn_nsp_rx; + rt->dst.input = dn_nsp_rx; err = dn_rt_set_next_hop(rt, &res); if (err) @@ -1152,7 +1152,7 @@ e_nobufs: err = -ENOBUFS; goto done; e_neighbour: - dst_free(&rt->u.dst); + dst_free(&rt->dst); goto e_nobufs; } @@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; - rt = rcu_dereference_bh(rt->u.dst.dn_next)) { + rt = rcu_dereference_bh(rt->dst.dn_next)) { if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && (flp->mark == rt->fl.mark) && (rt->fl.iif == 0) && (rt->fl.oif == flp->oif)) { - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); - *pprt = &rt->u.dst; + *pprt = &rt->dst; return 0; } } @@ -1375,29 +1375,29 @@ make_route: rt->fl.iif = in_dev->ifindex; rt->fl.mark = fl.mark; - rt->u.dst.flags = DST_HOST; - rt->u.dst.neighbour = neigh; - rt->u.dst.dev = out_dev; - rt->u.dst.lastuse = jiffies; - rt->u.dst.output = dn_rt_bug; + rt->dst.flags = DST_HOST; + rt->dst.neighbour = neigh; + rt->dst.dev = out_dev; + rt->dst.lastuse = jiffies; + rt->dst.output = dn_rt_bug; switch(res.type) { case RTN_UNICAST: - rt->u.dst.input = dn_forward; + rt->dst.input = dn_forward; break; case RTN_LOCAL: - rt->u.dst.output = dn_output; - rt->u.dst.input = dn_nsp_rx; - rt->u.dst.dev = in_dev; + rt->dst.output = dn_output; + rt->dst.input = dn_nsp_rx; + rt->dst.dev = in_dev; flags |= RTCF_LOCAL; break; default: case RTN_UNREACHABLE: case RTN_BLACKHOLE: - rt->u.dst.input = dst_discard; + rt->dst.input = dst_discard; } rt->rt_flags = flags; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + if (rt->dst.dev) + dev_hold(rt->dst.dev); err = dn_rt_set_next_hop(rt, &res); if (err) @@ -1405,7 +1405,7 @@ make_route: hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); dn_insert_route(rt, hash, &rt); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); done: if (neigh) @@ -1427,7 +1427,7 @@ e_nobufs: goto done; e_neighbour: - dst_free(&rt->u.dst); + dst_free(&rt->dst); goto done; } @@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb) rcu_read_lock(); for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; - rt = rcu_dereference(rt->u.dst.dn_next)) { + rt = rcu_dereference(rt->dst.dn_next)) { if ((rt->fl.fld_src == cb->src) && (rt->fl.fld_dst == cb->dst) && (rt->fl.oif == 0) && (rt->fl.mark == skb->mark) && (rt->fl.iif == cb->iif)) { - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); return 0; @@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 16; RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); } - if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + if (rt->dst.dev) + RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); /* * Note to self - change this if input routes reverse direction when * they deal only with inputs and not with replies like they do @@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); if (rt->rt_daddr != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto rtattr_failure; - expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; - if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, - rt->u.dst.error) < 0) + expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, + rt->dst.error) < 0) goto rtattr_failure; if (rt->fl.iif) RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); @@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void local_bh_enable(); memset(cb, 0, sizeof(struct dn_skb_cb)); rt = (struct dn_route *)skb_dst(skb); - if (!err && -rt->u.dst.error) - err = rt->u.dst.error; + if (!err && -rt->dst.error) + err = rt->dst.error; } else { int oif = 0; if (rta[RTA_OIF - 1]) @@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb->dev = NULL; if (err) goto out_free; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock_bh(); for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) { + rt = rcu_dereference_bh(rt->dst.dn_next), idx++) { if (idx < s_idx) continue; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { @@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou { struct dn_rt_cache_iter_state *s = seq->private; - rt = rt->u.dst.dn_next; + rt = rt->dst.dn_next; while(!rt) { rcu_read_unlock_bh(); if (--s->bucket < 0) @@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", - rt->u.dst.dev ? rt->u.dst.dev->name : "*", + rt->dst.dev ? rt->dst.dev->name : "*", dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), - atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, - (int) dst_metric(&rt->u.dst, RTAX_RTT)); + atomic_read(&rt->dst.__refcnt), + rt->dst.__use, + (int) dst_metric(&rt->dst, RTAX_RTT)); return 0; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 61ec032..215c839 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; - unsigned char *rawp; skb->dev = dev; skb_reset_mac_header(skb); @@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; - rawp = skb->data; - /* * This is a magic hack to spot IPX packets. Older Novell breaks * the protocol design and runs IPX over 802.3 without an 802.2 LLC * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This * won't work for fault tolerant netware but does for the rest. */ - if (*(unsigned short *)rawp == 0xFFFF) + if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF) return htons(ETH_P_802_3); /* diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 551ce56..d99e7e0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1100,7 +1100,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (err) return err; - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); new_saddr = rt->rt_src; @@ -1166,7 +1166,7 @@ int inet_sk_rebuild_header(struct sock *sk) err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); } if (!err) - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); else { /* Routing failed... */ sk->sk_route_caps = 0; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 917d2d6..cf78f41 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -427,7 +427,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) if (ip_route_output_key(net, &rt, &fl) < 0) return 1; - if (rt->u.dst.dev != dev) { + if (rt->dst.dev != dev) { NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); flag = 1; } @@ -532,7 +532,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct in_device *out_dev; int imi, omi = -1; - if (rt->u.dst.dev == dev) + if (rt->dst.dev == dev) return 0; if (!IN_DEV_PROXY_ARP(in_dev)) @@ -545,7 +545,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, /* place to check for proxy_arp for routes */ - out_dev = __in_dev_get_rcu(rt->u.dst.dev); + out_dev = __in_dev_get_rcu(rt->dst.dev); if (out_dev) omi = IN_DEV_MEDIUM_ID(out_dev); @@ -576,7 +576,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev, __be32 sip, __be32 tip) { /* Private VLAN is only concerned about the same ethernet segment */ - if (rt->u.dst.dev != dev) + if (rt->dst.dev != dev) return 0; /* Don't reply on self probes (often done by windowz boxes)*/ @@ -1042,7 +1042,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, struct rtable * rt; if ((err = ip_route_output_key(net, &rt, &fl)) != 0) return err; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; @@ -1149,7 +1149,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, struct rtable * rt; if ((err = ip_route_output_key(net, &rt, &fl)) != 0) return err; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index fb24658..fe3daa7 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -69,7 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; - sk_dst_set(sk, &rt->u.dst); + sk_dst_set(sk, &rt->dst); return(0); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index bdb6c71..7569b21 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -271,7 +271,7 @@ int xrlim_allow(struct dst_entry *dst, int timeout) static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, int type, int code) { - struct dst_entry *dst = &rt->u.dst; + struct dst_entry *dst = &rt->dst; int rc = 1; if (type > NR_ICMP_TYPES) @@ -327,7 +327,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net((*rt)->u.dst.dev)); + sk = icmp_sk(dev_net((*rt)->dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -359,7 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); struct sock *sk; struct inet_sock *inet; __be32 daddr; @@ -427,7 +427,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; - net = dev_net(rt->u.dst.dev); + net = dev_net(rt->dst.dev); /* * Find the original header. It is expected to be valid, of course. @@ -596,9 +596,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, - RT_TOS(tos), rt2->u.dst.dev); + RT_TOS(tos), rt2->dst.dev); - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); skb_in->_skb_refdst = orefdst; /* restore old refdst */ } @@ -610,7 +610,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) XFRM_LOOKUP_ICMP); switch (err) { case 0: - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = rt2; break; case -EPERM: @@ -629,7 +629,7 @@ route_done: /* RFC says return as much as we can without exceeding 576 bytes. */ - room = dst_mtu(&rt->u.dst); + room = dst_mtu(&rt->dst); if (room > 576) room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; @@ -972,7 +972,7 @@ int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3294f54..b5580d4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) return NULL; } - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); skb->dev = dev; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = rt->rt_src; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(pip, &rt->u.dst, NULL); + ip_select_ident(pip, &rt->dst, NULL); ((u8*)&pip[1])[0] = IPOPT_RA; ((u8*)&pip[1])[1] = 4; ((u8*)&pip[1])[2] = 0; @@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = rt->rt_src; iph->protocol = IPPROTO_IGMP; - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); ((u8*)&iph[1])[0] = IPOPT_RA; ((u8*)&iph[1])[1] = 4; ((u8*)&iph[1])[2] = 0; @@ -1425,7 +1425,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) } if (!dev && !ip_route_output_key(net, &rt, &fl)) { - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); } if (dev) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 70eb350..57c9e4d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -383,7 +383,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; - return &rt->u.dst; + return &rt->dst; route_err: ip_rt_put(rt); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 56cdf68..99461f0 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; - if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && + if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { - IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dst_mtu(&rt->u.dst))); + htonl(dst_mtu(&rt->dst))); goto drop; } /* We are about to mangle packet. Copy it! */ - if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) + if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len)) goto drop; iph = ip_hdr(skb); @@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, - rt->u.dst.dev, ip_forward_finish); + rt->dst.dev, ip_forward_finish); sr_failed: /* diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 32618e1..749e548 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -745,7 +745,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -755,7 +755,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev df = tiph->frag_off; if (df) - mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; + mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -803,7 +803,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tunnel->err_count = 0; } - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { @@ -830,7 +830,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -853,7 +853,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else - iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); + iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); } ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; @@ -915,7 +915,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_GRE }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } @@ -1174,7 +1174,7 @@ static int ipgre_open(struct net_device *dev) struct rtable *rt; if (ip_route_output_key(dev_net(dev), &rt, &fl)) return -EADDRNOTAVAIL; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 08a3b12..db47a5a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -356,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) - IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, skb->len); return dst_input(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9a4a6c9..6cbeb2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -151,15 +151,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->version = 4; iph->ihl = 5; iph->tos = inet->tos; - if (ip_dont_fragment(sk, &rt->u.dst)) + if (ip_dont_fragment(sk, &rt->dst)) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; - iph->ttl = ip_select_ttl(inet, &rt->u.dst); + iph->ttl = ip_select_ttl(inet, &rt->dst); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->protocol = sk->sk_protocol; - ip_select_ident(iph, &rt->u.dst, sk); + ip_select_ident(iph, &rt->dst, sk); if (opt && opt->optlen) { iph->ihl += opt->optlen>>2; @@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb) { struct sock *sk = skb->sk; struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->u.dst.dev; + struct net_device *dev = rt->dst.dev; /* * If the indicated interface is up and running, send the packet. @@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb) if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) goto no_route; } - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); } - skb_dst_set_noref(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); packet_routed: if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) @@ -372,11 +372,11 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df) + if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; - iph->ttl = ip_select_ttl(inet, &rt->u.dst); + iph->ttl = ip_select_ttl(inet, &rt->dst); iph->protocol = sk->sk_protocol; iph->saddr = rt->rt_src; iph->daddr = rt->rt_dst; @@ -387,7 +387,7 @@ packet_routed: ip_options_build(skb, opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(iph, &rt->u.dst, sk, + ip_select_ident_more(iph, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; @@ -452,7 +452,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) struct rtable *rt = skb_rtable(skb); int err = 0; - dev = rt->u.dst.dev; + dev = rt->dst.dev; /* * Point into the IP datagram header. @@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) */ hlen = iph->ihl * 4; - mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ + mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) mtu -= nf_bridge_mtu_reduction(skb); @@ -586,7 +586,7 @@ slow_path: * we need to make room for the encapsulating header */ pad = nf_bridge_pad(skb); - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad); mtu -= pad; /* @@ -833,13 +833,13 @@ int ip_append_data(struct sock *sk, */ *rtp = NULL; inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->u.dst.dev->mtu : - dst_mtu(rt->u.dst.path); - inet->cork.dst = &rt->u.dst; + rt->dst.dev->mtu : + dst_mtu(rt->dst.path); + inet->cork.dst = &rt->dst; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - if ((exthdrlen = rt->u.dst.header_len) != 0) { + if ((exthdrlen = rt->dst.header_len) != 0) { length += exthdrlen; transhdrlen += exthdrlen; } @@ -852,7 +852,7 @@ int ip_append_data(struct sock *sk, exthdrlen = 0; mtu = inet->cork.fragsize; } - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; @@ -869,14 +869,14 @@ int ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->u.dst.dev->features & NETIF_F_V4_CSUM && + rt->dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen) csummode = CHECKSUM_PARTIAL; inet->cork.length += length; if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); @@ -924,7 +924,7 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && - !(rt->u.dst.dev->features&NETIF_F_SG)) + !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -935,7 +935,7 @@ alloc_new_skb: * the last. */ if (datalen == length + fraggap) - alloclen += rt->u.dst.trailer_len; + alloclen += rt->dst.trailer_len; if (transhdrlen) { skb = sock_alloc_send_skb(sk, @@ -1008,7 +1008,7 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; @@ -1103,10 +1103,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) + if (!(rt->dst.dev->features&NETIF_F_SG)) return -EOPNOTSUPP; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); mtu = inet->cork.fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); @@ -1122,7 +1122,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, inet->cork.length += size; if ((sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } @@ -1274,8 +1274,8 @@ int ip_push_pending_frames(struct sock *sk) * If local_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc >= IP_PMTUDISC_DO || - (skb->len <= dst_mtu(&rt->u.dst) && - ip_dont_fragment(sk, &rt->u.dst))) + (skb->len <= dst_mtu(&rt->dst) && + ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); if (inet->cork.flags & IPCORK_OPT) @@ -1284,7 +1284,7 @@ int ip_push_pending_frames(struct sock *sk) if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; else - ttl = ip_select_ttl(inet, &rt->u.dst); + ttl = ip_select_ttl(inet, &rt->dst); iph = (struct iphdr *)skb->data; iph->version = 4; @@ -1295,7 +1295,7 @@ int ip_push_pending_frames(struct sock *sk) } iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->u.dst, sk); + ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; iph->saddr = rt->rt_src; @@ -1308,7 +1308,7 @@ int ip_push_pending_frames(struct sock *sk) * on dst refcount */ inet->cork.dst = NULL; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7fd6367..ec03673 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error_icmp; } } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) df |= old_iph->frag_off & htons(IP_DF); if (df) { - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { stats->collisions++; @@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_IPIP }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 856123f..8418afc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1551,9 +1551,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - dev = rt->u.dst.dev; + dev = rt->dst.dev; - if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { + if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not allow to send ICMP, so that packets will disappear to blackhole. @@ -1564,7 +1564,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; + encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; if (skb_cow(skb, encap)) { ip_rt_put(rt); @@ -1575,7 +1575,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->bytes_out += skb->len; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_decrease_ttl(ip_hdr(skb)); /* FIXME: forward and output firewalls used to be called here. diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 07de855..cfbc79a 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* Drop old route. */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) orefdst = skb->_skb_refdst; if (ip_route_input(skb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); + RT_TOS(iph->tos), rt->dst.dev) != 0) { + dst_release(&rt->dst); return -1; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); refdst_drop(orefdst); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 66cc3be..009a7b2 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -325,24 +325,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, int err; struct rtable *rt = *rtp; - if (length > rt->u.dst.dev->mtu) { + if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, - rt->u.dst.dev->mtu); + rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); *rtp = NULL; skb_reset_network_header(skb); @@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } @@ -384,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb_transport_header(skb))->type); err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - rt->u.dst.dev, dst_output); + rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) @@ -606,7 +606,7 @@ out: return len; do_confirm: - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 883b5c7..a291edb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -286,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); while (r) { - if (dev_net(r->u.dst.dev) == seq_file_net(seq) && + if (dev_net(r->dst.dev) == seq_file_net(seq) && r->rt_genid == st->genid) return r; - r = rcu_dereference_bh(r->u.dst.rt_next); + r = rcu_dereference_bh(r->dst.rt_next); } rcu_read_unlock_bh(); } @@ -301,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, { struct rt_cache_iter_state *st = seq->private; - r = r->u.dst.rt_next; + r = r->dst.rt_next; while (!r) { rcu_read_unlock_bh(); do { @@ -319,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, { struct rt_cache_iter_state *st = seq->private; while ((r = __rt_cache_get_next(seq, r)) != NULL) { - if (dev_net(r->u.dst.dev) != seq_file_net(seq)) + if (dev_net(r->dst.dev) != seq_file_net(seq)) continue; if (r->rt_genid == st->genid) break; @@ -377,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", - r->u.dst.dev ? r->u.dst.dev->name : "*", + r->dst.dev ? r->dst.dev->name : "*", (__force u32)r->rt_dst, (__force u32)r->rt_gateway, - r->rt_flags, atomic_read(&r->u.dst.__refcnt), - r->u.dst.__use, 0, (__force u32)r->rt_src, - (dst_metric(&r->u.dst, RTAX_ADVMSS) ? - (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), - dst_metric(&r->u.dst, RTAX_WINDOW), - (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + - dst_metric(&r->u.dst, RTAX_RTTVAR)), + r->rt_flags, atomic_read(&r->dst.__refcnt), + r->dst.__use, 0, (__force u32)r->rt_src, + (dst_metric(&r->dst, RTAX_ADVMSS) ? + (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), + dst_metric(&r->dst, RTAX_WINDOW), + (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + + dst_metric(&r->dst, RTAX_RTTVAR)), r->fl.fl4_tos, - r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, - r->u.dst.hh ? (r->u.dst.hh->hh_output == + r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, + r->dst.hh ? (r->dst.hh->hh_output == dev_queue_xmit) : 0, r->rt_spec_dst, &len); @@ -608,13 +608,13 @@ static inline int ip_rt_proc_init(void) static inline void rt_free(struct rtable *rt) { - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline void rt_drop(struct rtable *rt) { ip_rt_put(rt); - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline int rt_fast_clean(struct rtable *rth) @@ -622,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth) /* Kill broadcast/multicast entries very aggresively, if they collide in hash table with more useful entries */ return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && - rth->fl.iif && rth->u.dst.rt_next; + rth->fl.iif && rth->dst.rt_next; } static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->u.dst.expires; + rth->dst.expires; } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -636,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t unsigned long age; int ret = 0; - if (atomic_read(&rth->u.dst.__refcnt)) + if (atomic_read(&rth->dst.__refcnt)) goto out; ret = 1; - if (rth->u.dst.expires && - time_after_eq(jiffies, rth->u.dst.expires)) + if (rth->dst.expires && + time_after_eq(jiffies, rth->dst.expires)) goto out; - age = jiffies - rth->u.dst.lastuse; + age = jiffies - rth->dst.lastuse; ret = 0; if ((age <= tmo1 && !rt_fast_clean(rth)) || (age <= tmo2 && rt_valuable(rth))) @@ -660,7 +660,7 @@ out: return ret; */ static inline u32 rt_score(struct rtable *rt) { - u32 score = jiffies - rt->u.dst.lastuse; + u32 score = jiffies - rt->dst.lastuse; score = ~score & ~(3<<30); @@ -700,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) { - return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); + return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); } static inline int rt_is_expired(struct rtable *rth) { - return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); + return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } /* @@ -734,7 +734,7 @@ static void rt_do_flush(int process_context) rth = rt_hash_table[i].chain; /* defer releasing the head of the list after spin_unlock */ - for (tail = rth; tail; tail = tail->u.dst.rt_next) + for (tail = rth; tail; tail = tail->dst.rt_next) if (!rt_is_expired(tail)) break; if (rth != tail) @@ -743,9 +743,9 @@ static void rt_do_flush(int process_context) /* call rt_free on entries after the tail requiring flush */ prev = &rt_hash_table[i].chain; for (p = *prev; p; p = next) { - next = p->u.dst.rt_next; + next = p->dst.rt_next; if (!rt_is_expired(p)) { - prev = &p->u.dst.rt_next; + prev = &p->dst.rt_next; } else { *prev = next; rt_free(p); @@ -760,7 +760,7 @@ static void rt_do_flush(int process_context) spin_unlock_bh(rt_hash_lock_addr(i)); for (; rth != tail; rth = next) { - next = rth->u.dst.rt_next; + next = rth->dst.rt_next; rt_free(rth); } } @@ -791,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) while (aux != rth) { if (compare_hash_inputs(&aux->fl, &rth->fl)) return 0; - aux = aux->u.dst.rt_next; + aux = aux->dst.rt_next; } return ONE; } @@ -831,18 +831,18 @@ static void rt_check_expire(void) length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { - prefetch(rth->u.dst.rt_next); + prefetch(rth->dst.rt_next); if (rt_is_expired(rth)) { - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); continue; } - if (rth->u.dst.expires) { + if (rth->dst.expires) { /* Entry is expired even if it is in use */ - if (time_before_eq(jiffies, rth->u.dst.expires)) { + if (time_before_eq(jiffies, rth->dst.expires)) { nofree: tmo >>= 1; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; /* * We only count entries on * a chain with equal hash inputs once @@ -858,7 +858,7 @@ nofree: goto nofree; /* Cleanup aged off entries. */ - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); } spin_unlock_bh(rt_hash_lock_addr(i)); @@ -999,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops) if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; continue; } - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); goal--; } @@ -1068,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head) while (rth) { length += has_noalias(head, rth); - rth = rth->u.dst.rt_next; + rth = rth->dst.rt_next; } return length >> FRACT_BITS; } @@ -1090,7 +1090,7 @@ restart: candp = NULL; now = jiffies; - if (!rt_caching(dev_net(rt->u.dst.dev))) { + if (!rt_caching(dev_net(rt->dst.dev))) { /* * If we're not caching, just tell the caller we * were successful and don't touch the route. The @@ -1108,7 +1108,7 @@ restart: */ if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { - int err = arp_bind_neighbour(&rt->u.dst); + int err = arp_bind_neighbour(&rt->dst); if (err) { if (net_ratelimit()) printk(KERN_WARNING @@ -1127,19 +1127,19 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { if (rt_is_expired(rth)) { - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); continue; } if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { /* Put it first */ - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; /* * Since lookup is lockfree, the deletion * must be visible to another weakly ordered CPU before * the insertion at the start of the hash chain. */ - rcu_assign_pointer(rth->u.dst.rt_next, + rcu_assign_pointer(rth->dst.rt_next, rt_hash_table[hash].chain); /* * Since lookup is lockfree, the update writes @@ -1147,18 +1147,18 @@ restart: */ rcu_assign_pointer(rt_hash_table[hash].chain, rth); - dst_use(&rth->u.dst, now); + dst_use(&rth->dst, now); spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); if (rp) *rp = rth; else - skb_dst_set(skb, &rth->u.dst); + skb_dst_set(skb, &rth->dst); return 0; } - if (!atomic_read(&rth->u.dst.__refcnt)) { + if (!atomic_read(&rth->dst.__refcnt)) { u32 score = rt_score(rth); if (score <= min_score) { @@ -1170,7 +1170,7 @@ restart: chain_length++; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; } if (cand) { @@ -1181,17 +1181,17 @@ restart: * only 2 entries per bucket. We will see. */ if (chain_length > ip_rt_gc_elasticity) { - *candp = cand->u.dst.rt_next; + *candp = cand->dst.rt_next; rt_free(cand); } } else { if (chain_length > rt_chain_length_max && slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; if (!rt_caching(net)) { printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", - rt->u.dst.dev->name, num); + rt->dst.dev->name, num); } rt_emergency_hash_rebuild(net); spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1206,7 +1206,7 @@ restart: route or unicast forwarding path. */ if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { - int err = arp_bind_neighbour(&rt->u.dst); + int err = arp_bind_neighbour(&rt->dst); if (err) { spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1237,14 +1237,14 @@ restart: } } - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 - if (rt->u.dst.rt_next) { + if (rt->dst.rt_next) { struct rtable *trt; printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); - for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) + for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) printk(" . %pI4", &trt->rt_dst); printk("\n"); } @@ -1262,7 +1262,7 @@ skip_hashing: if (rp) *rp = rt; else - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); return 0; } @@ -1334,11 +1334,11 @@ static void rt_del(unsigned hash, struct rtable *rt) ip_rt_put(rt); while ((aux = *rthp) != NULL) { if (aux == rt || rt_is_expired(aux)) { - *rthp = aux->u.dst.rt_next; + *rthp = aux->dst.rt_next; rt_free(aux); continue; } - rthp = &aux->u.dst.rt_next; + rthp = &aux->dst.rt_next; } spin_unlock_bh(rt_hash_lock_addr(hash)); } @@ -1392,19 +1392,19 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || rt_is_expired(rth) || - !net_eq(dev_net(rth->u.dst.dev), net)) { - rthp = &rth->u.dst.rt_next; + !net_eq(dev_net(rth->dst.dev), net)) { + rthp = &rth->dst.rt_next; continue; } if (rth->rt_dst != daddr || rth->rt_src != saddr || - rth->u.dst.error || + rth->dst.error || rth->rt_gateway != old_gw || - rth->u.dst.dev != dev) + rth->dst.dev != dev) break; - dst_hold(&rth->u.dst); + dst_hold(&rth->dst); rt = dst_alloc(&ipv4_dst_ops); if (rt == NULL) { @@ -1414,20 +1414,20 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, /* Copy all the information. */ *rt = *rth; - rt->u.dst.__use = 1; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.child = NULL; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + rt->dst.__use = 1; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.child = NULL; + if (rt->dst.dev) + dev_hold(rt->dst.dev); if (rt->idev) in_dev_hold(rt->idev); - rt->u.dst.obsolete = -1; - rt->u.dst.lastuse = jiffies; - rt->u.dst.path = &rt->u.dst; - rt->u.dst.neighbour = NULL; - rt->u.dst.hh = NULL; + rt->dst.obsolete = -1; + rt->dst.lastuse = jiffies; + rt->dst.path = &rt->dst; + rt->dst.neighbour = NULL; + rt->dst.hh = NULL; #ifdef CONFIG_XFRM - rt->u.dst.xfrm = NULL; + rt->dst.xfrm = NULL; #endif rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; @@ -1436,23 +1436,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway = new_gw; /* Redirect received -> path was valid */ - dst_confirm(&rth->u.dst); + dst_confirm(&rth->dst); if (rt->peer) atomic_inc(&rt->peer->refcnt); - if (arp_bind_neighbour(&rt->u.dst) || - !(rt->u.dst.neighbour->nud_state & + if (arp_bind_neighbour(&rt->dst) || + !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->u.dst.neighbour) - neigh_event_send(rt->u.dst.neighbour, NULL); + if (rt->dst.neighbour) + neigh_event_send(rt->dst.neighbour, NULL); ip_rt_put(rth); rt_drop(rt); goto do_next; } - netevent.old = &rth->u.dst; - netevent.new = &rt->u.dst; + netevent.old = &rth->dst; + netevent.new = &rt->dst; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); @@ -1488,8 +1488,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || - (rt->u.dst.expires && - time_after_eq(jiffies, rt->u.dst.expires))) { + (rt->dst.expires && + time_after_eq(jiffies, rt->dst.expires))) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, rt->fl.oif, rt_genid(dev_net(dst->dev))); @@ -1527,7 +1527,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) int log_martians; rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->dst.dev); if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { rcu_read_unlock(); return; @@ -1538,30 +1538,30 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) - rt->u.dst.rate_tokens = 0; + if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) + rt->dst.rate_tokens = 0; /* Too many ignored redirects; do not send anything - * set u.dst.rate_last to the last seen redirected packet. + * set dst.rate_last to the last seen redirected packet. */ - if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { - rt->u.dst.rate_last = jiffies; + if (rt->dst.rate_tokens >= ip_rt_redirect_number) { + rt->dst.rate_last = jiffies; return; } /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (rt->u.dst.rate_tokens == 0 || + if (rt->dst.rate_tokens == 0 || time_after(jiffies, - (rt->u.dst.rate_last + - (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { + (rt->dst.rate_last + + (ip_rt_redirect_load << rt->dst.rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); - rt->u.dst.rate_last = jiffies; - ++rt->u.dst.rate_tokens; + rt->dst.rate_last = jiffies; + ++rt->dst.rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - rt->u.dst.rate_tokens == ip_rt_redirect_number && + rt->dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", &rt->rt_src, rt->rt_iif, @@ -1576,7 +1576,7 @@ static int ip_error(struct sk_buff *skb) unsigned long now; int code; - switch (rt->u.dst.error) { + switch (rt->dst.error) { case EINVAL: default: goto out; @@ -1585,7 +1585,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(dev_net(rt->u.dst.dev), + IP_INC_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INNOROUTES); break; case EACCES: @@ -1594,12 +1594,12 @@ static int ip_error(struct sk_buff *skb) } now = jiffies; - rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; - if (rt->u.dst.rate_tokens > ip_rt_error_burst) - rt->u.dst.rate_tokens = ip_rt_error_burst; - rt->u.dst.rate_last = now; - if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { - rt->u.dst.rate_tokens -= ip_rt_error_cost; + rt->dst.rate_tokens += now - rt->dst.rate_last; + if (rt->dst.rate_tokens > ip_rt_error_burst) + rt->dst.rate_tokens = ip_rt_error_burst; + rt->dst.rate_last = now; + if (rt->dst.rate_tokens >= ip_rt_error_cost) { + rt->dst.rate_tokens -= ip_rt_error_cost; icmp_send(skb, ICMP_DEST_UNREACH, code, 0); } @@ -1644,7 +1644,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + rth = rcu_dereference(rth->dst.rt_next)) { unsigned short mtu = new_mtu; if (rth->fl.fl4_dst != daddr || @@ -1653,8 +1653,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->rt_src != iph->saddr || rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || - dst_metric_locked(&rth->u.dst, RTAX_MTU) || - !net_eq(dev_net(rth->u.dst.dev), net) || + dst_metric_locked(&rth->dst, RTAX_MTU) || + !net_eq(dev_net(rth->dst.dev), net) || rt_is_expired(rth)) continue; @@ -1662,22 +1662,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= dst_mtu(&rth->u.dst) && + old_mtu >= dst_mtu(&rth->dst) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= dst_mtu(&rth->u.dst)) { - if (mtu < dst_mtu(&rth->u.dst)) { - dst_confirm(&rth->u.dst); + if (mtu <= dst_mtu(&rth->dst)) { + if (mtu < dst_mtu(&rth->dst)) { + dst_confirm(&rth->dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; - rth->u.dst.metrics[RTAX_LOCK-1] |= + rth->dst.metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); } - rth->u.dst.metrics[RTAX_MTU-1] = mtu; - dst_set_expires(&rth->u.dst, + rth->dst.metrics[RTAX_MTU-1] = mtu; + dst_set_expires(&rth->dst, ip_rt_mtu_expires); } est_mtu = mtu; @@ -1750,7 +1750,7 @@ static void ipv4_link_failure(struct sk_buff *skb) rt = skb_rtable(skb); if (rt) - dst_set_expires(&rt->u.dst, 0); + dst_set_expires(&rt->dst, 0); } static int ip_rt_bug(struct sk_buff *skb) @@ -1778,11 +1778,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt->fl.iif == 0) src = rt->rt_src; - else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { + else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { src = FIB_RES_PREFSRC(res); fib_res_put(&res); } else - src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, + src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); memcpy(addr, &src, 4); } @@ -1790,10 +1790,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) #ifdef CONFIG_NET_CLS_ROUTE static void set_class_tag(struct rtable *rt, u32 tag) { - if (!(rt->u.dst.tclassid & 0xFFFF)) - rt->u.dst.tclassid |= tag & 0xFFFF; - if (!(rt->u.dst.tclassid & 0xFFFF0000)) - rt->u.dst.tclassid |= tag & 0xFFFF0000; + if (!(rt->dst.tclassid & 0xFFFF)) + rt->dst.tclassid |= tag & 0xFFFF; + if (!(rt->dst.tclassid & 0xFFFF0000)) + rt->dst.tclassid |= tag & 0xFFFF0000; } #endif @@ -1805,30 +1805,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - memcpy(rt->u.dst.metrics, fi->fib_metrics, - sizeof(rt->u.dst.metrics)); + memcpy(rt->dst.metrics, fi->fib_metrics, + sizeof(rt->dst.metrics)); if (fi->fib_mtu == 0) { - rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && + rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + if (dst_metric_locked(&rt->dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && - rt->u.dst.dev->mtu > 576) - rt->u.dst.metrics[RTAX_MTU-1] = 576; + rt->dst.dev->mtu > 576) + rt->dst.metrics[RTAX_MTU-1] = 576; } #ifdef CONFIG_NET_CLS_ROUTE - rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; #endif } else - rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; - - if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) - rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) - rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, + rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; + + if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; + if (dst_mtu(&rt->dst) > IP_MAX_MTU) + rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; + if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) + rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, ip_rt_min_advmss); - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) - rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; + if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) + rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; #ifdef CONFIG_NET_CLS_ROUTE #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1873,13 +1873,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!rth) goto e_nobufs; - rth->u.dst.output = ip_rt_bug; - rth->u.dst.obsolete = -1; + rth->dst.output = ip_rt_bug; + rth->dst.obsolete = -1; - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1887,13 +1887,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE - rth->u.dst.tclassid = itag; + rth->dst.tclassid = itag; #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = init_net.loopback_dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = init_net.loopback_dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; @@ -1901,13 +1901,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; if (our) { - rth->u.dst.input= ip_local_deliver; + rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; } #ifdef CONFIG_IP_MROUTE if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) - rth->u.dst.input = ip_mr_input; + rth->dst.input = ip_mr_input; #endif RT_CACHE_STAT_INC(in_slow_mc); @@ -2016,12 +2016,12 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) - rth->u.dst.flags |= DST_NOXFRM; + rth->dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2031,16 +2031,16 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_iif = rth->fl.iif = in_dev->dev->ifindex; - rth->u.dst.dev = (out_dev)->dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = (out_dev)->dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; - rth->u.dst.obsolete = -1; - rth->u.dst.input = ip_forward; - rth->u.dst.output = ip_output; - rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); + rth->dst.obsolete = -1; + rth->dst.input = ip_forward; + rth->dst.output = ip_output; + rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rt_set_nexthop(rth, res, itag); @@ -2074,7 +2074,7 @@ static int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif, - rt_genid(dev_net(rth->u.dst.dev))); + rt_genid(dev_net(rth->dst.dev))); return rt_intern_hash(hash, rth, NULL, skb, fl->iif); } @@ -2197,14 +2197,14 @@ local_input: if (!rth) goto e_nobufs; - rth->u.dst.output= ip_rt_bug; - rth->u.dst.obsolete = -1; + rth->dst.output= ip_rt_bug; + rth->dst.obsolete = -1; rth->rt_genid = rt_genid(net); - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2212,20 +2212,20 @@ local_input: rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE - rth->u.dst.tclassid = itag; + rth->dst.tclassid = itag; #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = net->loopback_dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = net->loopback_dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->u.dst.input= ip_local_deliver; + rth->dst.input= ip_local_deliver; rth->rt_flags = flags|RTCF_LOCAL; if (res.type == RTN_UNREACHABLE) { - rth->u.dst.input= ip_error; - rth->u.dst.error= -err; + rth->dst.input= ip_error; + rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; @@ -2291,21 +2291,21 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, hash = rt_hash(daddr, saddr, iif, rt_genid(net)); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + rth = rcu_dereference(rth->dst.rt_next)) { if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | (rth->fl.iif ^ iif) | rth->fl.oif | (rth->fl.fl4_tos ^ tos)) == 0 && rth->fl.mark == skb->mark && - net_eq(dev_net(rth->u.dst.dev), net) && + net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { if (noref) { - dst_use_noref(&rth->u.dst, jiffies); - skb_dst_set_noref(skb, &rth->u.dst); + dst_use_noref(&rth->dst, jiffies); + skb_dst_set_noref(skb, &rth->dst); } else { - dst_use(&rth->u.dst, jiffies); - skb_dst_set(skb, &rth->u.dst); + dst_use(&rth->dst, jiffies); + skb_dst_set(skb, &rth->dst); } RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2412,12 +2412,12 @@ static int __mkroute_output(struct rtable **result, goto cleanup; } - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOXFRM)) - rth->u.dst.flags |= DST_NOXFRM; + rth->dst.flags |= DST_NOXFRM; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; @@ -2429,35 +2429,35 @@ static int __mkroute_output(struct rtable **result, rth->rt_iif = oldflp->oif ? : dev_out->ifindex; /* get references to the devices that are to be hold by the routing cache entry */ - rth->u.dst.dev = dev_out; + rth->dst.dev = dev_out; dev_hold(dev_out); rth->idev = in_dev_get(dev_out); rth->rt_gateway = fl->fl4_dst; rth->rt_spec_dst= fl->fl4_src; - rth->u.dst.output=ip_output; - rth->u.dst.obsolete = -1; + rth->dst.output=ip_output; + rth->dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); if (flags & RTCF_LOCAL) { - rth->u.dst.input = ip_local_deliver; + rth->dst.input = ip_local_deliver; rth->rt_spec_dst = fl->fl4_dst; } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { rth->rt_spec_dst = fl->fl4_src; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { - rth->u.dst.output = ip_mc_output; + rth->dst.output = ip_mc_output; RT_CACHE_STAT_INC(out_slow_mc); } #ifdef CONFIG_IP_MROUTE if (res->type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && !ipv4_is_local_multicast(oldflp->fl4_dst)) { - rth->u.dst.input = ip_mr_input; - rth->u.dst.output = ip_mc_output; + rth->dst.input = ip_mr_input; + rth->dst.output = ip_mc_output; } } #endif @@ -2712,7 +2712,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; - rth = rcu_dereference_bh(rth->u.dst.rt_next)) { + rth = rcu_dereference_bh(rth->dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && @@ -2720,9 +2720,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && - net_eq(dev_net(rth->u.dst.dev), net) && + net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - dst_use(&rth->u.dst, jiffies); + dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); *rp = rth; @@ -2759,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_alloc(&ipv4_dst_blackhole_ops); if (rt) { - struct dst_entry *new = &rt->u.dst; + struct dst_entry *new = &rt->dst; atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); + memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); - new->dev = ort->u.dst.dev; + new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); @@ -2791,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_free(new); } - dst_release(&(*rp)->u.dst); + dst_release(&(*rp)->dst); *rp = rt; return (rt ? 0 : -ENOMEM); } @@ -2861,11 +2861,11 @@ static int rt_fill_info(struct net *net, r->rtm_src_len = 32; NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); } - if (rt->u.dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); + if (rt->dst.dev) + NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); #ifdef CONFIG_NET_CLS_ROUTE - if (rt->u.dst.tclassid) - NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); + if (rt->dst.tclassid) + NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); #endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); @@ -2875,11 +2875,11 @@ static int rt_fill_info(struct net *net, if (rt->rt_dst != rt->rt_gateway) NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto nla_put_failure; - error = rt->u.dst.error; - expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; + error = rt->dst.error; + expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; if (rt->peer) { id = atomic_read(&rt->peer->ip_id_count) & 0xffff; if (rt->peer->tcp_ts_stamp) { @@ -2911,7 +2911,7 @@ static int rt_fill_info(struct net *net, NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, + if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, expires, error) < 0) goto nla_put_failure; @@ -2976,8 +2976,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void local_bh_enable(); rt = skb_rtable(skb); - if (err == 0 && rt->u.dst.error) - err = -rt->u.dst.error; + if (err == 0 && rt->dst.error) + err = -rt->dst.error; } else { struct flowi fl = { .nl_u = { @@ -2995,7 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err) goto errout_free; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -3031,12 +3031,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; rcu_read_lock_bh(); for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { - if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) + rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { + if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) continue; if (rt_is_expired(rt)) continue; - skb_dst_set_noref(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 5c48124..02bef6a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -354,15 +354,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } /* Try to redo what tcp_v4_send_synack did. */ - req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); + req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, &rcv_wscale, - dst_metric(&rt->u.dst, RTAX_INITRWND)); + dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ret = get_cookie_sock(sk, skb, req, &rt->u.dst); + ret = get_cookie_sock(sk, skb, req, &rt->dst); out: return ret; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7f976af..7f9515c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -237,7 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eec4ff4..32e0bef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, !sock_flag(sk, SOCK_BROADCAST)) goto out; if (connected) - sk_dst_set(sk, dst_clone(&rt->u.dst)); + sk_dst_set(sk, dst_clone(&rt->dst)); } if (msg->msg_flags&MSG_CONFIRM) @@ -978,7 +978,7 @@ out: return err; do_confirm: - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1705476..3493270 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, fl.fl4_src = saddr->a4; err = __ip_route_output_key(net, &rt, &fl); - dst = &rt->u.dst; + dst = &rt->dst; if (err) dst = ERR_PTR(err); return dst; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1a698d..b97bb1f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -557,7 +557,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warning("Freeing alive inet6 address %p\n", ifp); return; } - dst_release(&ifp->rt->u.dst); + dst_release(&ifp->rt->dst); call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); } @@ -823,7 +823,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) rt->rt6i_flags |= RTF_EXPIRES; } } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } out: @@ -1863,7 +1863,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) dev, expires, flags); } if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* Try to figure out our local address for this prefix */ @@ -4093,11 +4093,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); - dst_hold(&ifp->rt->u.dst); + dst_hold(&ifp->rt->dst); if (ifp->state == INET6_IFADDR_STATE_DEAD && ip6_del_rt(ifp->rt)) - dst_free(&ifp->rt->u.dst); + dst_free(&ifp->rt->dst); break; } } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f058fbd..0e5e943 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dst_release(&rt->u.dst); + dst_release(&rt->dst); } else if (ishost) { err = -EADDRNOTAVAIL; goto error; @@ -244,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac) { if (atomic_dec_and_test(&ac->aca_refcnt)) { in6_dev_put(ac->aca_idev); - dst_release(&ac->aca_rt->u.dst); + dst_release(&ac->aca_rt->dst); kfree(ac); } } @@ -350,7 +350,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->u.dst); + dst_hold(&aca->aca_rt->dst); ip6_del_rt(aca->aca_rt); aca_put(aca); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8e44f8f..b1108ed 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, if (arg.result) return arg.result; - dst_hold(&net->ipv6.ip6_null_entry->u.dst); - return &net->ipv6.ip6_null_entry->u.dst; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, @@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, struct in6_addr saddr; if (ipv6_dev_get_saddr(net, - ip6_dst_idev(&rt->u.dst)->dev, + ip6_dst_idev(&rt->dst)->dev, &flp->fl6_dst, rt6_flags2srcprefs(flags), &saddr)) @@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } again: - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = NULL; goto out; discard_pkt: - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); out: arg->result = rt; return rt == NULL ? -EAGAIN : 0; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 92a122b..b6a5859 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn) static __inline__ void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) - dst_free(&rt->u.dst); + dst_free(&rt->dst); } static void fib6_link_table(struct net *net, struct fib6_table *tb) @@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) int res; struct rt6_info *rt; - for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { res = rt6_dump_route(rt, w->args); if (res < 0) { /* Frame is full, suspend walking */ @@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &fn->leaf; - for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) { + for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) { /* * Search for duplicates */ @@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (iter->rt6i_metric > rt->rt6i_metric) break; - ins = &iter->u.dst.rt6_next; + ins = &iter->dst.rt6_next; } /* Reset round-robin state, if necessary */ @@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * insert node */ - rt->u.dst.rt6_next = iter; + rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); @@ -799,7 +799,7 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - dst_free(&rt->u.dst); + dst_free(&rt->dst); } return err; @@ -810,7 +810,7 @@ out: st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); - dst_free(&rt->u.dst); + dst_free(&rt->dst); return err; #endif } @@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, RT6_TRACE("fib6_del_route\n"); /* Unlink it */ - *rtp = rt->u.dst.rt6_next; + *rtp = rt->dst.rt6_next; rt->rt6i_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, FOR_WALKERS(w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rt->u.dst.rt6_next; + w->leaf = rt->dst.rt6_next; if (w->leaf == NULL) w->state = FWS_U; } } read_unlock(&fib6_walker_lock); - rt->u.dst.rt6_next = NULL; + rt->dst.rt6_next = NULL; /* If it was last route, expunge its radix tree node */ if (fn->leaf == NULL) { @@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info **rtp; #if RT6_DEBUG >= 2 - if (rt->u.dst.obsolete>0) { + if (rt->dst.obsolete>0) { WARN_ON(fn != NULL); return -ENOENT; } @@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * Walk the leaf entries looking for ourself */ - for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) { + for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { if (*rtp == rt) { fib6_del_route(fn, rtp, info); return 0; @@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) .nl_net = c->net, }; - for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; @@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) } gc_args.more++; } else if (rt->rt6i_flags & RTF_CACHE) { - if (atomic_read(&rt->u.dst.__refcnt) == 0 && - time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) { + if (atomic_read(&rt->dst.__refcnt) == 0 && + time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 89425af..d40b330 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); for (;;) { /* Prepare header of the next frame, @@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !frag) @@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - dst_release(&rt->u.dst); + dst_release(&rt->dst); return 0; } @@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - dst_release(&rt->u.dst); + dst_release(&rt->dst); return err; } @@ -785,7 +785,7 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -798,7 +798,7 @@ slow_path: */ ip6_copy_metadata(frag, skb); - skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); @@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* need source address above miyazawa*/ } - dst_hold(&rt->u.dst); - inet->cork.dst = &rt->u.dst; + dst_hold(&rt->dst); + inet->cork.dst = &rt->dst; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); + rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } inet->cork.fragsize = mtu; - if (dst_allfrag(rt->u.dst.path)) + if (dst_allfrag(rt->dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - + exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; @@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, mtu = inet->cork.fragsize; } - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); @@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } if (proto == IPPROTO_UDP && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, @@ -1270,7 +1270,7 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && - !(rt->u.dst.dev->features&NETIF_F_SG)) + !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -1281,7 +1281,7 @@ alloc_new_skb: * because we have no idea if we're the last one. */ if (datalen == length + fraggap) - alloclen += rt->u.dst.trailer_len; + alloclen += rt->dst.trailer_len; /* * We just reserve space for fragment header. @@ -1358,7 +1358,7 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; @@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8f39893..0fd027f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) goto out; - skb2->dev = rt->u.dst.dev; + skb2->dev = rt->dst.dev; /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { @@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_src = eiph->saddr; fl.fl4_tos = eiph->tos; if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { + rt->dst.dev->type != ARPHRD_TUNNEL) { ip_rt_put(rt); goto out; } @@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_send(skb2, rel_type, rel_code, rel_info); if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); kfree_skb(skb2); } @@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3e36d15..d1444b9 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -158,7 +158,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } else dev = dev_get_by_index_rcu(net, ifindex); @@ -248,7 +248,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, if (rt) { dev = rt->rt6i_dev; dev_hold(dev); - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } else dev = dev_get_by_index_rcu(net, ifindex); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0abdc24..1fc46fc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", __func__); - dst_release(&rt->u.dst); + dst_release(&rt->dst); in6_dev_put(in6_dev); return; } @@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; } skip_defrtr: @@ -1363,7 +1363,7 @@ skip_linkparms: in6_dev->cnf.mtu6 = mtu; if (rt) - rt->u.dst.metrics[RTAX_MTU-1] = mtu; + rt->dst.metrics[RTAX_MTU-1] = mtu; rt6_mtu_change(skb->dev, mtu); } @@ -1384,7 +1384,7 @@ skip_linkparms: } out: if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); else if (neigh) neigh_release(neigh); in6_dev_put(in6_dev); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 968b964..e677937 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -611,23 +611,23 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, int err; struct rt6_info *rt = (struct rt6_info *)*dstp; - if (length > rt->u.dst.dev->mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); + if (length > rt->dst.dev->mtu) { + ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); *dstp = NULL; skb_put(skb, length); @@ -643,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->u.dst.dev, dst_output); + rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 252d761..f770285 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = { }; static struct rt6_info ip6_null_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -ENETUNREACH, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_discard, - .output = ip6_pkt_discard_out, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -ENETUNREACH, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); static struct rt6_info ip6_prohibit_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_prohibit, - .output = ip6_pkt_prohibit_out, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_prohibit, + .output = ip6_pkt_prohibit_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = { }; static struct rt6_info ip6_blk_hole_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = dst_discard, - .output = dst_discard, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = dst_discard, + .output = dst_discard, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (!oif && ipv6_addr_any(saddr)) goto out; - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { struct net_device *dev = sprt->rt6i_dev; if (oif) { @@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; - rt = rt->u.dst.rt6_next) + rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; - rt = rt->u.dst.rt6_next) + rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); return match; @@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) if (!match && (strict & RT6_LOOKUP_F_REACHABLE)) { - struct rt6_info *next = rt0->u.dst.rt6_next; + struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) @@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt->rt6i_expires = jiffies + HZ * lifetime; rt->rt6i_flags |= RTF_EXPIRES; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } return 0; } @@ -555,7 +549,7 @@ restart: rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); BACKTRACK(net, &fl->fl6_src); out: - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); return rt; @@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->u.dst.flags |= DST_HOST; + rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad if (net_ratelimit()) printk(KERN_WARNING "Neighbour table overflow.\n"); - dst_free(&rt->u.dst); + dst_free(&rt->dst); return NULL; } rt->rt6i_nexthop = neigh; @@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->u.dst.flags |= DST_HOST; + rt->dst.flags |= DST_HOST; rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); } return rt; @@ -726,7 +720,7 @@ restart: rt->rt6i_flags & RTF_CACHE) goto out; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) @@ -739,10 +733,10 @@ restart: #endif } - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); if (nrt) { err = ip6_ins_rt(nrt); if (!err) @@ -756,7 +750,7 @@ restart: * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto relookup; out: @@ -764,11 +758,11 @@ out: reachable = 0; goto restart_2; } - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); out2: - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; + rt->dst.lastuse = jiffies; + rt->dst.__use++; return rt; } @@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl struct dst_entry *new = NULL; if (rt) { - new = &rt->u.dst; + new = &rt->dst; atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); - new->dev = ort->u.dst.dev; + memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); rt->rt6i_idev = ort->rt6i_idev; @@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { if (rt->rt6i_flags&RTF_CACHE) { - dst_set_expires(&rt->u.dst, 0); + dst_set_expires(&rt->dst, 0); rt->rt6i_flags |= RTF_EXPIRES; } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) rt->rt6i_node->fn_sernum = -1; @@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_dev = dev; rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.output = ip6_output; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ - rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST + rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST ? DST_HOST : 0; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); @@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, #endif spin_lock_bh(&icmp6_dst_lock); - rt->u.dst.next = icmp6_dst_gc_list; - icmp6_dst_gc_list = &rt->u.dst; + rt->dst.next = icmp6_dst_gc_list; + icmp6_dst_gc_list = &rt->dst; spin_unlock_bh(&icmp6_dst_lock); fib6_force_start_gc(net); out: - return &rt->u.dst; + return &rt->dst; } int icmp6_dst_gc(void) @@ -1159,7 +1153,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->u.dst.obsolete = -1; + rt->dst.obsolete = -1; rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? jiffies + clock_t_to_jiffies(cfg->fc_expires) : 0; @@ -1171,16 +1165,16 @@ int ip6_route_add(struct fib6_config *cfg) addr_type = ipv6_addr_type(&cfg->fc_dst); if (addr_type & IPV6_ADDR_MULTICAST) - rt->u.dst.input = ip6_mc_input; + rt->dst.input = ip6_mc_input; else - rt->u.dst.input = ip6_forward; + rt->dst.input = ip6_forward; - rt->u.dst.output = ip6_output; + rt->dst.output = ip6_output; ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->u.dst.flags = DST_HOST; + rt->dst.flags = DST_HOST; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1208,9 +1202,9 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->u.dst.output = ip6_pkt_discard_out; - rt->u.dst.input = ip6_pkt_discard; - rt->u.dst.error = -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; + rt->dst.error = -ENETUNREACH; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; goto install_route; } @@ -1244,7 +1238,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; if (dev) { if (dev != grt->rt6i_dev) { - dst_release(&grt->u.dst); + dst_release(&grt->dst); goto out; } } else { @@ -1255,7 +1249,7 @@ int ip6_route_add(struct fib6_config *cfg) } if (!(grt->rt6i_flags&RTF_GATEWAY)) err = 0; - dst_release(&grt->u.dst); + dst_release(&grt->dst); if (err) goto out; @@ -1294,18 +1288,18 @@ install_route: goto out; } - rt->u.dst.metrics[type - 1] = nla_get_u32(nla); + rt->dst.metrics[type - 1] = nla_get_u32(nla); } } } - if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!dst_mtu(&rt->u.dst)) - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.dev = dev; + if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + if (!dst_mtu(&rt->dst)) + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); + if (!dst_metric(&rt->dst, RTAX_ADVMSS)) + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1319,7 +1313,7 @@ out: if (idev) in6_dev_put(idev); if (rt) - dst_free(&rt->u.dst); + dst_free(&rt->dst); return err; } @@ -1336,7 +1330,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) write_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); - dst_release(&rt->u.dst); + dst_release(&rt->dst); write_unlock_bh(&table->tb6_lock); @@ -1369,7 +1363,7 @@ static int ip6_route_del(struct fib6_config *cfg) &cfg->fc_src, cfg->fc_src_len); if (fn) { - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (cfg->fc_ifindex && (rt->rt6i_dev == NULL || rt->rt6i_dev->ifindex != cfg->fc_ifindex)) @@ -1379,7 +1373,7 @@ static int ip6_route_del(struct fib6_config *cfg) continue; if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); return __ip6_del_rt(rt, &cfg->fc_nlinfo); @@ -1421,7 +1415,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { /* * Current route is on-link; redirect is always invalid. * @@ -1445,7 +1439,7 @@ restart: rt = net->ipv6.ip6_null_entry; BACKTRACK(net, &fl->fl6_src); out: - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); @@ -1513,10 +1507,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, * Look, redirects are sent only in response to data packets, * so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == rt->u.dst.neighbour) + if (neigh == rt->dst.neighbour) goto out; nrt = ip6_rt_copy(rt); @@ -1529,20 +1523,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); nrt->rt6i_dst.plen = 128; - nrt->u.dst.flags |= DST_HOST; + nrt->dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ - nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->u.dst)); + nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); + nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->dst)); if (ip6_ins_rt(nrt)) goto out; - netevent.old = &rt->u.dst; - netevent.new = &nrt->u.dst; + netevent.old = &rt->dst; + netevent.new = &nrt->dst; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags&RTF_CACHE) { @@ -1551,7 +1545,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, } out: - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* @@ -1570,7 +1564,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, if (rt == NULL) return; - if (pmtu >= dst_mtu(&rt->u.dst)) + if (pmtu >= dst_mtu(&rt->dst)) goto out; if (pmtu < IPV6_MIN_MTU) { @@ -1588,7 +1582,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, They are sent only in response to data packets, so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); /* Host route. If it is static, it would be better not to override it, but add new one, so that @@ -1596,10 +1590,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, would return automatically. */ if (rt->rt6i_flags & RTF_CACHE) { - rt->u.dst.metrics[RTAX_MTU-1] = pmtu; + rt->dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) - rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); + rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1615,9 +1609,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_alloc_clone(rt, daddr); if (nrt) { - nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; + nrt->dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) - nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; /* According to RFC 1981, detecting PMTU increase shouldn't be * happened within 5 mins, the recommended timer is 10 mins. @@ -1625,13 +1619,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); } out: - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* @@ -1644,18 +1638,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (rt) { - rt->u.dst.input = ort->u.dst.input; - rt->u.dst.output = ort->u.dst.output; - - memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); - rt->u.dst.error = ort->u.dst.error; - rt->u.dst.dev = ort->u.dst.dev; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + rt->dst.input = ort->dst.input; + rt->dst.output = ort->dst.output; + + memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + rt->dst.error = ort->dst.error; + rt->dst.dev = ort->dst.dev; + if (rt->dst.dev) + dev_hold(rt->dst.dev); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); - rt->u.dst.lastuse = jiffies; + rt->dst.lastuse = jiffies; rt->rt6i_expires = 0; ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); @@ -1689,14 +1683,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!fn) goto out; - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->rt6i_dev->ifindex != ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) continue; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); break; } out: @@ -1744,14 +1738,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return NULL; write_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { if (dev == rt->rt6i_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) break; } if (rt) - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); write_unlock_bh(&table->tb6_lock); return rt; } @@ -1790,9 +1784,9 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); goto restart; @@ -1930,15 +1924,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, dev_hold(net->loopback_dev); in6_dev_hold(idev); - rt->u.dst.flags = DST_HOST; - rt->u.dst.input = ip6_input; - rt->u.dst.output = ip6_output; + rt->dst.flags = DST_HOST; + rt->dst.input = ip6_input; + rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - rt->u.dst.obsolete = -1; + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) @@ -1947,7 +1941,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_LOCAL; neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); if (IS_ERR(neigh)) { - dst_free(&rt->u.dst); + dst_free(&rt->dst); /* We are casting this because that is the return * value type. But an errno encoded pointer is the @@ -1962,7 +1956,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); - atomic_set(&rt->u.dst.__refcnt, 1); + atomic_set(&rt->dst.__refcnt, 1); return rt; } @@ -2033,12 +2027,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->rt6i_dev == arg->dev && - !dst_metric_locked(&rt->u.dst, RTAX_MTU) && - (dst_mtu(&rt->u.dst) >= arg->mtu || - (dst_mtu(&rt->u.dst) < arg->mtu && - dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { - rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU) && + (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6))) { + rt->dst.metrics[RTAX_MTU-1] = arg->mtu; + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); } return 0; } @@ -2252,20 +2246,20 @@ static int rt6_fill_node(struct net *net, #endif NLA_PUT_U32(skb, RTA_IIF, iif); } else if (dst) { - struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); struct in6_addr saddr_buf; if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto nla_put_failure; - if (rt->u.dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + if (rt->dst.neighbour) + NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); - if (rt->u.dst.dev) + if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); @@ -2277,8 +2271,8 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, - expires, rt->u.dst.error) < 0) + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, + expires, rt->dst.error) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2364,7 +2358,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, @@ -2416,12 +2410,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { - net->ipv6.ip6_null_entry->u.dst.dev = dev; + net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; + net->ipv6.ip6_prohibit_entry->dst.dev = dev; net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); - net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; + net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); #endif } @@ -2464,8 +2458,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000"); } seq_printf(m, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, rt->rt6i_flags, + rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), + rt->dst.__use, rt->rt6i_flags, rt->rt6i_dev ? rt->rt6i_dev->name : ""); return 0; } @@ -2646,9 +2640,9 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_null_entry) goto out_ip6_dst_ops; - net->ipv6.ip6_null_entry->u.dst.path = + net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; - net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2656,18 +2650,18 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_prohibit_entry) goto out_ip6_null_entry; - net->ipv6.ip6_prohibit_entry->u.dst.path = + net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; - net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), GFP_KERNEL); if (!net->ipv6.ip6_blk_hole_entry) goto out_ip6_prohibit_entry; - net->ipv6.ip6_blk_hole_entry->u.dst.path = + net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; - net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; #endif net->ipv6.sysctl.flush_delay = 0; @@ -2742,12 +2736,12 @@ int __init ip6_route_init(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ - init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif ret = fib6_init(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 702c532..4699cd3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -712,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, stats->tx_carrier_errors++; goto tx_error_icmp; } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -721,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (df) { - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { stats->collisions++; @@ -780,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags = 0; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -829,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_IPV6 }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0852512..226a0ae 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; - sk_dst_set(sk, &rt->u.dst); + sk_dst_set(sk, &rt->dst); write_lock_bh(&l2tp_ip_lock); hlist_del_init(&sk->sk_bind_node); @@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) goto no_route; } - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); } - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); /* Queue the packet to IP for output */ rc = ip_queue_xmit(skb); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 93c15a1..02b078e 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -90,10 +90,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) &dest->addr.ip); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, - atomic_read(&rt->u.dst.__refcnt), rtos); + atomic_read(&rt->dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); } else { @@ -148,10 +148,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) &dest->addr.in6); return NULL; } - __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", &dest->addr.in6, - atomic_read(&rt->u.dst.__refcnt)); + atomic_read(&rt->dst.__refcnt)); } spin_unlock(&dest->dst_lock); } else { @@ -198,7 +198,7 @@ do { \ (skb)->ipvs_property = 1; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - (rt)->u.dst.dev, dst_output); \ + (rt)->dst.dev, dst_output); \ } while (0) @@ -245,7 +245,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -265,7 +265,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -309,9 +309,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -323,13 +323,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -376,7 +376,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -388,12 +388,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -452,9 +452,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); @@ -465,12 +465,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -498,7 +498,7 @@ tx_error: kfree_skb(skb); return NF_STOLEN; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif @@ -549,9 +549,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { ip_rt_put(rt); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); @@ -601,7 +601,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -615,7 +615,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->ttl = old_iph->ttl; - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -660,12 +660,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!rt) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); /* TODO IPv6: do we need this check in IPv6? */ if (mtu < 1280) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); goto tx_error; } @@ -674,7 +674,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -689,7 +689,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; @@ -707,7 +707,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -760,7 +760,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -780,7 +780,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -813,10 +813,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -827,13 +827,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -888,7 +888,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -900,12 +900,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp(skb, pp, cp, 0); @@ -963,9 +963,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -975,12 +975,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp_v6(skb, pp, cp, 0); @@ -1001,7 +1001,7 @@ out: LeaveFunction(10); return rc; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 6eaee7c..b969025 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { if (rt1->rt_gateway == rt2->rt_gateway && - rt1->u.dst.dev == rt2->u.dst.dev) + rt1->dst.dev == rt2->dst.dev) ret = 1; - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); } - dst_release(&rt1->u.dst); + dst_release(&rt1->dst); } break; } @@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && - rt1->u.dst.dev == rt2->u.dst.dev) + rt1->dst.dev == rt2->dst.dev) ret = 1; - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); } - dst_release(&rt1->u.dst); + dst_release(&rt1->dst); } break; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 497b222..aadde01 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, goto out; rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { if (ifa->ifa_broadcast == iph->daddr) { diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 62ec021..1841388 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_unlock(); if (rt != NULL) { - mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); } return mtu; } diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 859d9fd..c77a85b 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -77,8 +77,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); - skb->dev = rt->u.dst.dev; + skb_dst_set(skb, &rt->dst); + skb->dev = rt->dst.dev; skb->protocol = htons(ETH_P_IP); return true; } diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index f0f85b0..9f1729b 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) return; } - peer->if_mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); + peer->if_mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); _leave(" [if_mtu %u]", peer->if_mtu); } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1827498..a0e1a7f 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, __func__, &fl.fl4_dst, &fl.fl4_src); if (!ip_route_output_key(&init_net, &rt, &fl)) { - dst = &rt->u.dst; + dst = &rt->dst; } /* If there is no association or if a source address is passed, no @@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, fl.fl4_src = laddr->a.v4.sin_addr.s_addr; fl.fl_ip_sport = laddr->a.v4.sin_port; if (!ip_route_output_key(&init_net, &rt, &fl)) { - dst = &rt->u.dst; + dst = &rt->dst; goto out_unlock; } } -- cgit v1.1 From c7de2cf053420d63bac85133469c965d4b1083e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jun 2010 02:09:23 +0000 Subject: pkt_sched: gen_kill_estimator() rcu fixes gen_kill_estimator() API is incomplete or not well documented, since caller should make sure an RCU grace period is respected before freeing stats_lock. This was partially addressed in commit 5d944c640b4 (gen_estimator: deadlock fix), but same problem exist for all gen_kill_estimator() users, if lock they use is not already RCU protected. A code review shows xt_RATEEST.c, act_api.c, act_police.c have this problem. Other are ok because they use qdisc lock, already RCU protected. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 1 + net/netfilter/xt_RATEEST.c | 12 +++++++++++- net/sched/act_api.c | 11 ++++++++++- net/sched/act_police.c | 12 +++++++++++- 4 files changed, 33 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 785e527..9fbe7f7 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head) * * Removes the rate estimator specified by &bstats and &rate_est. * + * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 69c01e1..de079abd 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name) } EXPORT_SYMBOL_GPL(xt_rateest_lookup); +static void xt_rateest_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct xt_rateest, rcu)); +} + void xt_rateest_put(struct xt_rateest *est) { mutex_lock(&xt_rateest_mutex); if (--est->refcnt == 0) { hlist_del(&est->list); gen_kill_estimator(&est->bstats, &est->rstats); - kfree(est); + /* + * gen_estimator est_timer() might access est->lock or bstats, + * wait a RCU grace period before freeing 'est' + */ + call_rcu(&est->rcu, xt_rateest_free_rcu); } mutex_unlock(&xt_rateest_mutex); } @@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void) static void __exit xt_rateest_tg_fini(void) { xt_unregister_target(&xt_rateest_tg_reg); + rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */ } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 972378f..23b25f8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -26,6 +26,11 @@ #include #include +static void tcf_common_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct tcf_common, tcfc_rcu)); +} + void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) { unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); @@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) write_unlock_bh(hinfo->lock); gen_kill_estimator(&p->tcfc_bstats, &p->tcfc_rate_est); - kfree(p); + /* + * gen_estimator est_timer() might access p->tcfc_lock + * or bstats, wait a RCU grace period before freeing p + */ + call_rcu(&p->tcfc_rcu, tcf_common_free_rcu); return; } } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 654f73d..537a487 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -97,6 +97,11 @@ nla_put_failure: goto done; } +static void tcf_police_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct tcf_police, tcf_rcu)); +} + static void tcf_police_destroy(struct tcf_police *p) { unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); @@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p) qdisc_put_rtab(p->tcfp_R_tab); if (p->tcfp_P_tab) qdisc_put_rtab(p->tcfp_P_tab); - kfree(p); + /* + * gen_estimator est_timer() might access p->tcf_lock + * or bstats, wait a RCU grace period before freeing p + */ + call_rcu(&p->tcf_rcu, tcf_police_free_rcu); return; } } @@ -397,6 +406,7 @@ static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops); + rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */ } module_init(police_init_module); -- cgit v1.1 From 0c78a92fbd655ab990e2799f645707f05f548e2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Jun 2010 16:33:05 +0000 Subject: econet: fix locking econet lacks proper locking. It holds econet_lock only when inserting or deleting an entry in econet_sklist, not during lookups. - convert econet_lock from rwlock to spinlock - use econet_lock in ec_listening_socket() lookup - use appropriate sock_hold() / sock_put() to avoid corruptions. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/econet/af_econet.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 2a5a805..dc54bd0 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -48,7 +48,7 @@ static const struct proto_ops econet_ops; static struct hlist_head econet_sklist; -static DEFINE_RWLOCK(econet_lock); +static DEFINE_SPINLOCK(econet_lock); static DEFINE_MUTEX(econet_mutex); /* Since there are only 256 possible network numbers (or fewer, depends @@ -98,16 +98,16 @@ struct ec_cb static void econet_remove_socket(struct hlist_head *list, struct sock *sk) { - write_lock_bh(&econet_lock); + spin_lock_bh(&econet_lock); sk_del_node_init(sk); - write_unlock_bh(&econet_lock); + spin_unlock_bh(&econet_lock); } static void econet_insert_socket(struct hlist_head *list, struct sock *sk) { - write_lock_bh(&econet_lock); + spin_lock_bh(&econet_lock); sk_add_node(sk, list); - write_unlock_bh(&econet_lock); + spin_unlock_bh(&econet_lock); } /* @@ -782,15 +782,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char struct sock *sk; struct hlist_node *node; + spin_lock(&econet_lock); sk_for_each(sk, node, &econet_sklist) { struct econet_sock *opt = ec_sk(sk); if ((opt->port == port || opt->port == 0) && (opt->station == station || opt->station == 0) && - (opt->net == net || opt->net == 0)) + (opt->net == net || opt->net == 0)) { + sock_hold(sk); goto found; + } } sk = NULL; found: + spin_unlock(&econet_lock); return sk; } @@ -852,7 +856,7 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) { struct iphdr *ip = ip_hdr(skb); unsigned char stn = ntohl(ip->saddr) & 0xff; - struct sock *sk; + struct sock *sk = NULL; struct sk_buff *newskb; struct ec_device *edev = skb->dev->ec_ptr; @@ -882,10 +886,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) } aun_send_response(ip->saddr, ah->handle, 3, 0); + sock_put(sk); return; bad: aun_send_response(ip->saddr, ah->handle, 4, 0); + if (sk) + sock_put(sk); } /* @@ -1050,7 +1057,7 @@ release: static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; - struct sock *sk; + struct sock *sk = NULL; struct ec_device *edev = dev->ec_ptr; if (!net_eq(dev_net(dev), &init_net)) @@ -1085,10 +1092,12 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb, hdr->port)) goto drop; - + sock_put(sk); return NET_RX_SUCCESS; drop: + if (sk) + sock_put(sk); kfree_skb(skb); return NET_RX_DROP; } -- cgit v1.1 From 43d28b6515a6ea580a198df3e253e88236f08978 Mon Sep 17 00:00:00 2001 From: Daniel Turull Date: Wed, 9 Jun 2010 22:49:57 +0000 Subject: pktgen: increasing transmission granularity This patch increases the granularity of the rate generated by pktgen. The previous version of pktgen uses micro seconds (udelay) resolution when it was delayed causing gaps in the rates. It is changed to nanosecond (ndelay). Now any rate is possible. Also it allows to set, the desired rate in Mb/s or packets per second. The documentation has been updated. Signed-off-by: Daniel Turull Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/core/pktgen.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1dacd7b..6428653 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -169,7 +169,7 @@ #include #include /* do_div */ -#define VERSION "2.73" +#define VERSION "2.74" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) @@ -980,6 +980,40 @@ static ssize_t pktgen_if_write(struct file *file, (unsigned long long) pkt_dev->delay); return count; } + if (!strcmp(name, "rate")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + if (!value) + return len; + pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value; + if (debug) + printk(KERN_INFO + "pktgen: Delay set at: %llu ns\n", + pkt_dev->delay); + + sprintf(pg_result, "OK: rate=%lu", value); + return count; + } + if (!strcmp(name, "ratep")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + if (!value) + return len; + pkt_dev->delay = NSEC_PER_SEC/value; + if (debug) + printk(KERN_INFO + "pktgen: Delay set at: %llu ns\n", + pkt_dev->delay); + + sprintf(pg_result, "OK: rate=%lu", value); + return count; + } if (!strcmp(name, "udp_src_min")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) @@ -2142,15 +2176,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_set_expires(&t.timer, spin_until); - remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer)); + remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); if (remaining <= 0) { pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); return; } start_time = ktime_now(); - if (remaining < 100) - udelay(remaining); /* really small just spin */ + if (remaining < 100000) + ndelay(remaining); /* really small just spin */ else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); -- cgit v1.1 From be1f3c2c027cc5ad735df6a45a542ed1db7ec48b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 8 Jun 2010 07:19:54 +0000 Subject: net: Enable 64-bit net device statistics on 32-bit architectures Use struct rtnl_link_stats64 as the statistics structure. On 32-bit architectures, insert 32 bits of padding after/before each field of struct net_device_stats to make its layout compatible with struct rtnl_link_stats64. Add an anonymous union in net_device; move stats into the union and add struct rtnl_link_stats64 stats64. Add net_device_ops::ndo_get_stats64, implementations of which will return a pointer to struct rtnl_link_stats64. Drivers that implement this operation must not update the structure asynchronously. Change dev_get_stats() to call ndo_get_stats64 if available, and to return a pointer to struct rtnl_link_stats64. Change callers of dev_get_stats() accordingly. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/8021q/vlanproc.c | 13 +++++++------ net/core/dev.c | 19 +++++++++++-------- net/core/net-sysfs.c | 12 ++++++------ net/core/rtnetlink.c | 6 +++--- 4 files changed, 27 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index afead35..df56f5c 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -278,8 +278,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) { struct net_device *vlandev = (struct net_device *) seq->private; const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); - const struct net_device_stats *stats; + const struct rtnl_link_stats64 *stats; static const char fmt[] = "%30s %12lu\n"; + static const char fmt64[] = "%30s %12llu\n"; int i; if (!is_vlan_dev(vlandev)) @@ -291,12 +292,12 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) vlandev->name, dev_info->vlan_id, (int)(dev_info->flags & 1), vlandev->priv_flags); - seq_printf(seq, fmt, "total frames received", stats->rx_packets); - seq_printf(seq, fmt, "total bytes received", stats->rx_bytes); - seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast); + seq_printf(seq, fmt64, "total frames received", stats->rx_packets); + seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes); + seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast); seq_puts(seq, "\n"); - seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets); - seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes); + seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); + seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); seq_printf(seq, fmt, "total headroom inc", dev_info->cnt_inc_headroom_on_tx); seq_printf(seq, fmt, "total encap on xmit", diff --git a/net/core/dev.c b/net/core/dev.c index 2778449..a1abc10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3701,10 +3701,10 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - const struct net_device_stats *stats = dev_get_stats(dev); + const struct rtnl_link_stats64 *stats = dev_get_stats(dev); - seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " - "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, @@ -5281,18 +5281,21 @@ EXPORT_SYMBOL(dev_txq_stats_fold); * @dev: device to get statistics from * * Get network statistics from device. The device driver may provide - * its own method by setting dev->netdev_ops->get_stats; otherwise - * the internal statistics structure is used. + * its own method by setting dev->netdev_ops->get_stats64 or + * dev->netdev_ops->get_stats; otherwise the internal statistics + * structure is used. */ -const struct net_device_stats *dev_get_stats(struct net_device *dev) +const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_get_stats64) + return ops->ndo_get_stats64(dev); if (ops->ndo_get_stats) - return ops->ndo_get_stats(dev); + return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev); dev_txq_stats_fold(dev, &dev->stats); - return &dev->stats; + return &dev->stats64; } EXPORT_SYMBOL(dev_get_stats); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 99e7052..ea3bb4c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -29,6 +29,7 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; +static const char fmt_u64[] = "%llu\n"; static inline int dev_isalive(const struct net_device *dev) { @@ -324,14 +325,13 @@ static ssize_t netstat_show(const struct device *d, struct net_device *dev = to_net_dev(d); ssize_t ret = -EINVAL; - WARN_ON(offset > sizeof(struct net_device_stats) || - offset % sizeof(unsigned long) != 0); + WARN_ON(offset > sizeof(struct rtnl_link_stats64) || + offset % sizeof(u64) != 0); read_lock(&dev_base_lock); if (dev_isalive(dev)) { - const struct net_device_stats *stats = dev_get_stats(dev); - ret = sprintf(buf, fmt_ulong, - *(unsigned long *)(((u8 *) stats) + offset)); + const struct rtnl_link_stats64 *stats = dev_get_stats(dev); + ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset)); } read_unlock(&dev_base_lock); return ret; @@ -343,7 +343,7 @@ static ssize_t show_##name(struct device *d, \ struct device_attribute *attr, char *buf) \ { \ return netstat_show(d, attr, buf, \ - offsetof(struct net_device_stats, name)); \ + offsetof(struct rtnl_link_stats64, name)); \ } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1a2af24..e645778 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -579,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - const struct net_device_stats *b) + const struct rtnl_link_stats64 *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -610,7 +610,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; } -static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b) +static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) { struct rtnl_link_stats64 a; @@ -791,7 +791,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, { struct ifinfomsg *ifm; struct nlmsghdr *nlh; - const struct net_device_stats *stats; + const struct rtnl_link_stats64 *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); -- cgit v1.1 From 0b5c25e8ac3a60bd01a52ca7405ba96aec8c16be Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 8 Jun 2010 08:23:01 +0000 Subject: irttp: Print device parameters and statistics as unsigned Device statistics have type unsigned long and several of the device-specific parameters printed here have type __u32. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/irda/irttp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 47db1d8..285761e 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1853,23 +1853,23 @@ static int irttp_seq_show(struct seq_file *seq, void *v) self->remote_credit); seq_printf(seq, "send credit: %d\n", self->send_credit); - seq_printf(seq, " tx packets: %ld, ", + seq_printf(seq, " tx packets: %lu, ", self->stats.tx_packets); - seq_printf(seq, "rx packets: %ld, ", + seq_printf(seq, "rx packets: %lu, ", self->stats.rx_packets); - seq_printf(seq, "tx_queue len: %d ", + seq_printf(seq, "tx_queue len: %u ", skb_queue_len(&self->tx_queue)); - seq_printf(seq, "rx_queue len: %d\n", + seq_printf(seq, "rx_queue len: %u\n", skb_queue_len(&self->rx_queue)); seq_printf(seq, " tx_sdu_busy: %s, ", self->tx_sdu_busy? "TRUE":"FALSE"); seq_printf(seq, "rx_sdu_busy: %s\n", self->rx_sdu_busy? "TRUE":"FALSE"); - seq_printf(seq, " max_seg_size: %d, ", + seq_printf(seq, " max_seg_size: %u, ", self->max_seg_size); - seq_printf(seq, "tx_max_sdu_size: %d, ", + seq_printf(seq, "tx_max_sdu_size: %u, ", self->tx_max_sdu_size); - seq_printf(seq, "rx_max_sdu_size: %d\n", + seq_printf(seq, "rx_max_sdu_size: %u\n", self->rx_max_sdu_size); seq_printf(seq, " Used by (%s)\n\n", -- cgit v1.1 From e897082fe7a5b591dc4dd5599ac39081a7c8e482 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 13 Jun 2010 10:36:30 +0000 Subject: net: fix deliver_no_wcard regression on loopback device deliver_no_wcard is not being set in skb_copy_header. In the skb_cloned case it is not being cleared and may cause the skb to be dropped when the loopback device pushes it back up the stack. Signed-off-by: John Fastabend Acked-by: Eric Dumazet Tested-by: Markus Trippelsdorf Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9f07e74..bcf2fa3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; + new->deliver_no_wcard = old->deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif -- cgit v1.1 From e8d15e6460cb0eea00f2574a80d94496943403ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 13 Jun 2010 10:50:46 +0000 Subject: net: rxhash already set in __copy_skb_header No need to copy rxhash again in __skb_clone() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bcf2fa3..34432b4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -570,7 +570,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(len); C(data_len); C(mac_len); - C(rxhash); n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; n->cloned = 1; n->nohdr = 0; -- cgit v1.1 From f6bc7d9e4760324258ad5f5d147e79db8442842e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Jun 2010 04:39:27 +0000 Subject: ipv6: avoid two atomics in ipv6_rthdr_rcv() Use __in6_dev_get() instead of in6_dev_get()/in6_dev_put() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 853a633..262f105 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -312,6 +312,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) Routing header. ********************************/ +/* called with rcu_read_lock() */ static int ipv6_rthdr_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); @@ -324,12 +325,9 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int accept_source_route = net->ipv6.devconf_all->accept_source_route; - idev = in6_dev_get(skb->dev); - if (idev) { - if (accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; - in6_dev_put(idev); - } + idev = __in6_dev_get(skb->dev); + if (idev && accept_source_route > idev->cnf.accept_source_route) + accept_source_route = idev->cnf.accept_source_route; if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + -- cgit v1.1 From c68f24cc354050415c5ea543cd19ea5424463a2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Jun 2010 04:46:20 +0000 Subject: ipv6: RCU changes in ipv6_get_mtu() and ip6_dst_hoplimit() Use RCU to avoid atomic ops on idev refcnt in ipv6_get_mtu() and ip6_dst_hoplimit() Signed-off-by: Eric Dumazet Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/route.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f770285..8f2d040 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1084,11 +1084,11 @@ static int ipv6_get_mtu(struct net_device *dev) int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; - idev = in6_dev_get(dev); - if (idev) { + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) mtu = idev->cnf.mtu6; - in6_dev_put(idev); - } + rcu_read_unlock(); return mtu; } @@ -1097,12 +1097,15 @@ int ip6_dst_hoplimit(struct dst_entry *dst) int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); if (hoplimit < 0) { struct net_device *dev = dst->dev; - struct inet6_dev *idev = in6_dev_get(dev); - if (idev) { + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); - } else + else hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + rcu_read_unlock(); } return hoplimit; } -- cgit v1.1 From d6cc1d642de9284cb26488ea390d915b50ee2504 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Jun 2010 19:35:21 +0000 Subject: inetpeer: various changes Try to reduce cache line contentions in peer management, to reduce IP defragmentation overhead. - peer_fake_node is marked 'const' to make sure its not modified. (tested with CONFIG_DEBUG_RODATA=y) - Group variables in two structures to reduce number of dirtied cache lines. One named "peers" for avl tree root, its number of entries, and associated lock. (candidate for RCU conversion) - A second one named "unused_peers" for unused list and its lock - Add a !list_empty() test in unlink_from_unused() to avoid taking lock when entry is not unused. - Use atomic_dec_and_lock() in inet_putpeer() to avoid taking lock in some cases. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 94 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 6bcfe52..035673fd 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -70,17 +70,25 @@ static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height -static struct inet_peer peer_fake_node = { - .avl_left = &peer_fake_node, - .avl_right = &peer_fake_node, + +#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) +static const struct inet_peer peer_fake_node = { + .avl_left = peer_avl_empty, + .avl_right = peer_avl_empty, .avl_height = 0 }; -#define peer_avl_empty (&peer_fake_node) -static struct inet_peer *peer_root = peer_avl_empty; -static DEFINE_RWLOCK(peer_pool_lock); + +static struct { + struct inet_peer *root; + rwlock_t lock; + int total; +} peers = { + .root = peer_avl_empty, + .lock = __RW_LOCK_UNLOCKED(peers.lock), + .total = 0, +}; #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static int peer_total; /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more * aggressively at this stage */ @@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min int inet_peer_gc_mintime __read_mostly = 10 * HZ; int inet_peer_gc_maxtime __read_mostly = 120 * HZ; -static LIST_HEAD(unused_peers); -static DEFINE_SPINLOCK(inet_peer_unused_lock); +static struct { + struct list_head list; + spinlock_t lock; +} unused_peers = { + .list = LIST_HEAD_INIT(unused_peers.list), + .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), +}; static void peer_check_expire(unsigned long dummy); static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); @@ -131,9 +144,11 @@ void __init inet_initpeers(void) /* Called with or without local BH being disabled. */ static void unlink_from_unused(struct inet_peer *p) { - spin_lock_bh(&inet_peer_unused_lock); - list_del_init(&p->unused); - spin_unlock_bh(&inet_peer_unused_lock); + if (!list_empty(&p->unused)) { + spin_lock_bh(&unused_peers.lock); + list_del_init(&p->unused); + spin_unlock_bh(&unused_peers.lock); + } } /* @@ -146,9 +161,9 @@ static void unlink_from_unused(struct inet_peer *p) struct inet_peer *u, **v; \ if (_stack != NULL) { \ stackptr = _stack; \ - *stackptr++ = &peer_root; \ + *stackptr++ = &peers.root; \ } \ - for (u = peer_root; u != peer_avl_empty; ) { \ + for (u = peers.root; u != peer_avl_empty; ) { \ if (_daddr == u->v4daddr) \ break; \ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ @@ -262,7 +277,7 @@ do { \ n->avl_right = peer_avl_empty; \ **--stackptr = n; \ peer_avl_rebalance(stack, stackptr); \ -} while(0) +} while (0) /* May be called with local BH enabled. */ static void unlink_from_pool(struct inet_peer *p) @@ -271,7 +286,7 @@ static void unlink_from_pool(struct inet_peer *p) do_free = 0; - write_lock_bh(&peer_pool_lock); + write_lock_bh(&peers.lock); /* Check the reference counter. It was artificially incremented by 1 * in cleanup() function to prevent sudden disappearing. If the * reference count is still 1 then the node is referenced only as `p' @@ -303,10 +318,10 @@ static void unlink_from_pool(struct inet_peer *p) delp[1] = &t->avl_left; /* was &p->avl_left */ } peer_avl_rebalance(stack, stackptr); - peer_total--; + peers.total--; do_free = 1; } - write_unlock_bh(&peer_pool_lock); + write_unlock_bh(&peers.lock); if (do_free) kmem_cache_free(peer_cachep, p); @@ -326,16 +341,16 @@ static int cleanup_once(unsigned long ttl) struct inet_peer *p = NULL; /* Remove the first entry from the list of unused nodes. */ - spin_lock_bh(&inet_peer_unused_lock); - if (!list_empty(&unused_peers)) { + spin_lock_bh(&unused_peers.lock); + if (!list_empty(&unused_peers.list)) { __u32 delta; - p = list_first_entry(&unused_peers, struct inet_peer, unused); + p = list_first_entry(&unused_peers.list, struct inet_peer, unused); delta = (__u32)jiffies - p->dtime; if (delta < ttl) { /* Do not prune fresh entries. */ - spin_unlock_bh(&inet_peer_unused_lock); + spin_unlock_bh(&unused_peers.lock); return -1; } @@ -345,7 +360,7 @@ static int cleanup_once(unsigned long ttl) * before unlink_from_pool() call. */ atomic_inc(&p->refcnt); } - spin_unlock_bh(&inet_peer_unused_lock); + spin_unlock_bh(&unused_peers.lock); if (p == NULL) /* It means that the total number of USED entries has @@ -364,11 +379,11 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; /* Look up for the address quickly. */ - read_lock_bh(&peer_pool_lock); + read_lock_bh(&peers.lock); p = lookup(daddr, NULL); if (p != peer_avl_empty) atomic_inc(&p->refcnt); - read_unlock_bh(&peer_pool_lock); + read_unlock_bh(&peers.lock); if (p != peer_avl_empty) { /* The existing node has been found. */ @@ -390,7 +405,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) atomic_set(&n->ip_id_count, secure_ip_id(daddr)); n->tcp_ts_stamp = 0; - write_lock_bh(&peer_pool_lock); + write_lock_bh(&peers.lock); /* Check if an entry has suddenly appeared. */ p = lookup(daddr, stack); if (p != peer_avl_empty) @@ -399,10 +414,10 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) /* Link the node. */ link_to_pool(n); INIT_LIST_HEAD(&n->unused); - peer_total++; - write_unlock_bh(&peer_pool_lock); + peers.total++; + write_unlock_bh(&peers.lock); - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ cleanup_once(0); @@ -411,7 +426,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) out_free: /* The appropriate node is already in the pool. */ atomic_inc(&p->refcnt); - write_unlock_bh(&peer_pool_lock); + write_unlock_bh(&peers.lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); /* Free preallocated the preallocated node. */ @@ -425,12 +440,12 @@ static void peer_check_expire(unsigned long dummy) unsigned long now = jiffies; int ttl; - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) ttl = inet_peer_minttl; else ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * - peer_total / inet_peer_threshold * HZ; + peers.total / inet_peer_threshold * HZ; while (!cleanup_once(ttl)) { if (jiffies != now) break; @@ -439,22 +454,25 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; else peer_periodic_timer.expires = jiffies + inet_peer_gc_maxtime - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; + peers.total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } void inet_putpeer(struct inet_peer *p) { - spin_lock_bh(&inet_peer_unused_lock); - if (atomic_dec_and_test(&p->refcnt)) { - list_add_tail(&p->unused, &unused_peers); + local_bh_disable(); + + if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { + list_add_tail(&p->unused, &unused_peers.list); p->dtime = (__u32)jiffies; + spin_unlock(&unused_peers.lock); } - spin_unlock_bh(&inet_peer_unused_lock); + + local_bh_enable(); } -- cgit v1.1