From aa8673599f1d269b4e4d9b0c0f61fca57bc02699 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Apr 2011 18:59:05 -0700 Subject: llc: Fix length check in llc_fixup_skb(). Fixes bugzilla #32872 The LLC stack pretends to support non-linear skbs but there is a direct use of skb_tail_pointer() in llc_fixup_skb(). Use pskb_may_pull() to see if data_size bytes remain and can be accessed linearly in the packet, instead of direct pointer checks. Signed-off-by: David S. Miller --- net/llc/llc_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 058f1e9..9032421 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -121,8 +121,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) s32 data_size = ntohs(pdulen) - llc_len; if (data_size < 0 || - ((skb_tail_pointer(skb) - - (u8 *)pdu) - llc_len) < data_size) + !pskb_may_pull(skb, data_size)) return 0; if (unlikely(pskb_trim_rcsum(skb, data_size))) return 0; -- cgit v1.1 From f8e9881c2aef1e982e5abc25c046820cd0b7cf64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Apr 2011 13:39:14 -0700 Subject: bridge: reset IPCB in br_parse_ip_options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 462fb2af9788a82 (bridge : Sanitize skb before it enters the IP stack), missed one IPCB init before calling ip_options_compile() Thanks to Scot Doyle for his tests and bug reports. Reported-by: Scot Doyle Signed-off-by: Eric Dumazet Cc: Hiroaki SHIMODA Acked-by: Bandan Das Acked-by: Stephen Hemminger Cc: Jan Lübbe Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 008ff6c..f3bc322 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -249,11 +249,9 @@ static int br_parse_ip_options(struct sk_buff *skb) goto drop; } - /* Zero out the CB buffer if no options present */ - if (iph->ihl == 5) { - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (iph->ihl == 5) return 0; - } opt->optlen = iph->ihl*4 - sizeof(struct iphdr); if (ip_options_compile(dev_net(dev), opt, skb)) -- cgit v1.1 From 66944e1c5797562cebe2d1857d46dff60bf9a69e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Apr 2011 22:39:40 +0000 Subject: inetpeer: reduce stack usage On 64bit arches, we use 752 bytes of stack when cleanup_once() is called from inet_getpeer(). Lets share the avl stack to save ~376 bytes. Before patch : # objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl 0x000006c3 unlink_from_pool [inetpeer.o]: 376 0x00000721 unlink_from_pool [inetpeer.o]: 376 0x00000cb1 inet_getpeer [inetpeer.o]: 376 0x00000e6d inet_getpeer [inetpeer.o]: 376 0x0004 inet_initpeers [inetpeer.o]: 112 # size net/ipv4/inetpeer.o text data bss dec hex filename 5320 432 21 5773 168d net/ipv4/inetpeer.o After patch : objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl 0x00000c11 inet_getpeer [inetpeer.o]: 376 0x00000dcd inet_getpeer [inetpeer.o]: 376 0x00000ab9 peer_check_expire [inetpeer.o]: 328 0x00000b7f peer_check_expire [inetpeer.o]: 328 0x0004 inet_initpeers [inetpeer.o]: 112 # size net/ipv4/inetpeer.o text data bss dec hex filename 5163 432 21 5616 15f0 net/ipv4/inetpeer.o Signed-off-by: Eric Dumazet Cc: Scot Doyle Cc: Stephen Hemminger Cc: Hiroaki SHIMODA Reviewed-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index dd1b20e..9df4e63 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -354,7 +354,8 @@ static void inetpeer_free_rcu(struct rcu_head *head) } /* May be called with local BH enabled. */ -static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) +static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, + struct inet_peer __rcu **stack[PEER_MAXDEPTH]) { int do_free; @@ -368,7 +369,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) * We use refcnt=-1 to alert lockless readers this entry is deleted. */ if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { - struct inet_peer __rcu **stack[PEER_MAXDEPTH]; struct inet_peer __rcu ***stackptr, ***delp; if (lookup(&p->daddr, stack, base) != p) BUG(); @@ -422,7 +422,7 @@ static struct inet_peer_base *peer_to_base(struct inet_peer *p) } /* May be called with local BH enabled. */ -static int cleanup_once(unsigned long ttl) +static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) { struct inet_peer *p = NULL; @@ -454,7 +454,7 @@ static int cleanup_once(unsigned long ttl) * happen because of entry limits in route cache. */ return -1; - unlink_from_pool(p, peer_to_base(p)); + unlink_from_pool(p, peer_to_base(p), stack); return 0; } @@ -524,7 +524,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) if (base->total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ - cleanup_once(0); + cleanup_once(0, stack); return p; } @@ -540,6 +540,7 @@ static void peer_check_expire(unsigned long dummy) { unsigned long now = jiffies; int ttl, total; + struct inet_peer __rcu **stack[PEER_MAXDEPTH]; total = compute_total(); if (total >= inet_peer_threshold) @@ -548,7 +549,7 @@ static void peer_check_expire(unsigned long dummy) ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * total / inet_peer_threshold * HZ; - while (!cleanup_once(ttl)) { + while (!cleanup_once(ttl, stack)) { if (jiffies != now) break; } -- cgit v1.1 From 192910a6cca5e50e5bd6cbd1da0e7376c7adfe62 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 12 Apr 2011 13:59:33 -0700 Subject: net: Do not wrap sysctl igmp_max_memberships in IP_MULTICAST controlling igmp_max_membership is useful even when IP_MULTICAST is off. Quagga(an OSPF deamon) uses multicast addresses for all interfaces using a single socket and hits igmp_max_membership limit when there are 20 interfaces or more. Always export sysctl igmp_max_memberships in proc, just like igmp_max_msf Signed-off-by: Joakim Tjernlund Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1a45665..321e6e8 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -311,7 +311,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_do_large_bitmap, }, -#ifdef CONFIG_IP_MULTICAST { .procname = "igmp_max_memberships", .data = &sysctl_igmp_max_memberships, @@ -319,8 +318,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - -#endif { .procname = "igmp_max_msf", .data = &sysctl_igmp_max_msf, -- cgit v1.1 From 020318d0d2af51e0fd59ba654ede9b2171558720 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 12 Apr 2011 15:29:54 -0700 Subject: irda: fix locking unbalance in irda_sendmsg 5b40964eadea40509d353318d2c82e8b7bf5e8a5 ("irda: Remove BKL instances from af_irda.c") introduced a path where we have a locking unbalance. If we pass invalid flags, we unlock a socket we never locked, resulting in this... ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- trinity/20101 is trying to release lock (sk_lock-AF_IRDA) at: [] irda_sendmsg+0x207/0x21d [irda] but there are no more locks to release! other info that might help us debug this: no locks held by trinity/20101. stack backtrace: Pid: 20101, comm: trinity Not tainted 2.6.39-rc3+ #3 Call Trace: [] ? irda_sendmsg+0x207/0x21d [irda] [] print_unlock_inbalance_bug+0xc7/0xd2 [] ? irda_sendmsg+0x207/0x21d [irda] [] lock_release+0xcf/0x18e [] release_sock+0x2d/0x155 [] irda_sendmsg+0x207/0x21d [irda] [] __sock_sendmsg+0x69/0x75 [] sock_sendmsg+0xa1/0xb6 [] ? might_fault+0x5c/0xac [] ? lock_release+0x181/0x18e [] ? might_fault+0xa5/0xac [] ? might_fault+0x5c/0xac [] ? fcheck_files+0xb9/0xf0 [] ? copy_from_user+0x2f/0x31 [] ? verify_iovec+0x52/0xa6 [] sys_sendmsg+0x23a/0x2b8 [] ? lock_release+0x181/0x18e [] ? up_read+0x28/0x2c [] ? do_page_fault+0x360/0x3b4 [] ? trace_hardirqs_on_caller+0x10b/0x12f [] ? finish_task_switch+0xb2/0xe3 [] ? finish_task_switch+0x46/0xe3 [] ? trace_hardirqs_off_caller+0x33/0x90 [] ? retint_swapgs+0x13/0x1b [] ? trace_hardirqs_on_caller+0x10b/0x12f [] ? audit_syscall_entry+0x11c/0x148 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/irda/af_irda.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index c9890e2..cc61697 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1297,8 +1297,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | MSG_NOSIGNAL)) { - err = -EINVAL; - goto out; + return -EINVAL; } lock_sock(sk); -- cgit v1.1 From bfac3693c426d280b026f6a1b77dc2294ea43fea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 12 Apr 2011 15:33:23 -0700 Subject: ieee802154: Remove hacked CFLAGS in net/ieee802154/Makefile It adds -Wall (which the kernel carefully controls already) and of all things -DDEBUG (which should be set by other means if desired, please we have dynamic-debug these days). Kill this noise. Reported-by: Dave Jones Signed-off-by: David S. Miller --- net/ieee802154/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index ce2d335..5761185 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,5 +1,3 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o af_802154-y := af_ieee802154.o raw.o dgram.o - -ccflags-y += -Wall -DDEBUG -- cgit v1.1 From ea2d36883ca8e6caab23b6d15bfa80b1d1d81d2f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2011 14:38:37 +0000 Subject: net: Disable all TSO features when SG is disabled The feature flags NETIF_F_TSO and NETIF_F_TSO6 independently enable TSO for IPv4 and IPv6 respectively. However, the test in netdev_fix_features() and its predecessor functions was never updated to check for NETIF_F_TSO6, possibly because it was originally proposed that TSO for IPv6 would be dependent on both feature flags. Now that these feature flags can be changed independently from user-space and we depend on netdev_fix_features() to fix invalid feature combinations, it's important to disable them both if scatter-gather is disabled. Also disable NETIF_F_TSO_ECN so user-space sees all TSO features as disabled. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 956d3b0..6401fb5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5203,9 +5203,9 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } /* TSO requires that SG is present as well. */ - if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); - features &= ~NETIF_F_TSO; + if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping TSO features since no SG feature.\n"); + features &= ~NETIF_F_ALL_TSO; } /* Software GSO depends on SG. */ -- cgit v1.1 From 31d8b9e099e59f880aa65095951559896d4e20fa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2011 14:47:15 +0000 Subject: net: Disable NETIF_F_TSO_ECN when TSO is disabled NETIF_F_TSO_ECN has no effect when TSO is disabled; this just means that feature state will be accurately reported to user-space. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6401fb5..c2ac599 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5208,6 +5208,10 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) features &= ~NETIF_F_ALL_TSO; } + /* TSO ECN requires that TSO is present as well. */ + if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) + features &= ~NETIF_F_TSO_ECN; + /* Software GSO depends on SG. */ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); -- cgit v1.1 From 25f7bf7d0dfb460505cbe42676340e33100aca2e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 12 Apr 2011 15:20:48 +0000 Subject: sctp: fix oops when updating retransmit path with DEBUG on commit fbdf501c9374966a56829ecca3a7f25d2b49a305 sctp: Do no select unconfirmed transports for retransmissions Introduced the initial falt. commit d598b166ced20d9b9281ea3527c0e18405ddb803 sctp: Make sure we always return valid retransmit path Solved the problem, but forgot to change the DEBUG statement. Thus it was still possible to dereference a NULL pointer. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0698cad..922fdd7 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1323,6 +1323,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) if (t) asoc->peer.retran_path = t; + else + t = asoc->peer.retran_path; SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" " %p addr: ", -- cgit v1.1 From 9494c7c5774d64a84a269aad38c153c4dbff97e6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 Apr 2011 15:22:22 +0000 Subject: sctp: fix oops while removed transport still using as retran path Since we can not update retran path to unconfirmed transports, when we remove a peer, the retran path may not be update if the other transports are all unconfirmed, and we will still using the removed transport as the retran path. This may cause panic if retrasnmit happen. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 922fdd7..1a21c57 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -569,6 +569,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, sctp_assoc_set_primary(asoc, transport); if (asoc->peer.active_path == peer) asoc->peer.active_path = transport; + if (asoc->peer.retran_path == peer) + asoc->peer.retran_path = transport; if (asoc->peer.last_data_from == peer) asoc->peer.last_data_from = transport; -- cgit v1.1 From 3e8c806a08c7beecd972e7ce15c570b9aba64baa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Apr 2011 12:01:14 -0700 Subject: Revert "tcp: disallow bind() to reuse addr/port" This reverts commit c191a836a908d1dd6b40c503741f91b914de3348. It causes known regressions for programs that expect to be able to use SO_REUSEADDR to shutdown a socket, then successfully rebind another socket to the same ID. Programs such as haproxy and amavisd expect this to work. This should fix kernel bugzilla 32832. Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 5 ++--- net/ipv6/inet6_connection_sock.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6c0b7f4..38f23e7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk, !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || - ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { + sk2->sk_state == TCP_LISTEN) { const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -122,8 +122,7 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && - !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { spin_unlock(&head->lock); snum = smallest_rover; goto have_snum; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 1660546..f2c5b0f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && (!sk->sk_reuse || !sk2->sk_reuse || - ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) && + sk2->sk_state == TCP_LISTEN) && ipv6_rcv_saddr_equal(sk, sk2)) break; } -- cgit v1.1 From c65353daf137dd41f3ede3baf62d561fca076228 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Apr 2011 05:55:37 +0000 Subject: ip: ip_options_compile() resilient to NULL skb route Scot Doyle demonstrated ip_options_compile() could be called with an skb without an attached route, using a setup involving a bridge, netfilter, and forged IP packets. Let's make ip_options_compile() and ip_options_rcv_srr() a bit more robust, instead of changing bridge/netfilter code. With help from Hiroaki SHIMODA. Reported-by: Scot Doyle Tested-by: Scot Doyle Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Acked-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 28a736f..2391b24 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -329,7 +329,7 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - if (skb) { + if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); opt->is_changed = 1; } @@ -371,7 +371,7 @@ int ip_options_compile(struct net *net, goto error; } opt->ts = optptr - iph; - if (skb) { + if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); timeptr = (__be32*)&optptr[optptr[2]+3]; } @@ -603,7 +603,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) unsigned long orefdst; int err; - if (!opt->srr) + if (!opt->srr || !rt) return 0; if (skb->pkt_type != PACKET_HOST) -- cgit v1.1