From 875ad3f8e7dff6bc1d053e5bfe73d8e8d2e6ae67 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 23 Jan 2012 12:49:36 -0500
Subject: SUNRPC: Fix machine creds in generic_create_cred and generic_match

- generic_create_cred needs to copy the '.principal' field.
- generic_match needs to ignore the groups and match on the '.principal'
  field.

This fixes an Oops that was introduced by commit 68c9715 (SUNRPC:
Clean up the RPCSEC_GSS service ticket requests)

Reported-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_generic.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 1426ec3..75762f3 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -92,6 +92,7 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	if (gcred->acred.group_info != NULL)
 		get_group_info(gcred->acred.group_info);
 	gcred->acred.machine_cred = acred->machine_cred;
+	gcred->acred.principal = acred->principal;
 
 	dprintk("RPC:       allocated %s cred %p for uid %d gid %d\n",
 			gcred->acred.machine_cred ? "machine" : "generic",
@@ -123,6 +124,17 @@ generic_destroy_cred(struct rpc_cred *cred)
 	call_rcu(&cred->cr_rcu, generic_free_cred_callback);
 }
 
+static int
+machine_cred_match(struct auth_cred *acred, struct generic_cred *gcred, int flags)
+{
+	if (!gcred->acred.machine_cred ||
+	    gcred->acred.principal != acred->principal ||
+	    gcred->acred.uid != acred->uid ||
+	    gcred->acred.gid != acred->gid)
+		return 0;
+	return 1;
+}
+
 /*
  * Match credentials against current process creds.
  */
@@ -132,9 +144,12 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
 	struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
 	int i;
 
+	if (acred->machine_cred)
+		return machine_cred_match(acred, gcred, flags);
+
 	if (gcred->acred.uid != acred->uid ||
 	    gcred->acred.gid != acred->gid ||
-	    gcred->acred.machine_cred != acred->machine_cred)
+	    gcred->acred.machine_cred != 0)
 		goto out_nomatch;
 
 	/* Optimisation in the case where pointers are identical... */
-- 
cgit v1.1


From ba1960257c5980f9b58057995ce3394bd8e48ca3 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Tue, 10 Jan 2012 15:19:54 +0200
Subject: mac80211: update oper_channel on ibss join

Commit 13c40c5 ("mac80211: Add HT operation modes for IBSS") broke
ibss operation by mistakenly removing the local->oper_channel
update (causing ibss to start on the wrong channel). fix it.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Acked-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index b3d76b7..a464396 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -106,6 +106,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
 
+	local->oper_channel = chan;
 	channel_type = ifibss->channel_type;
 	if (channel_type > NL80211_CHAN_HT20 &&
 	    !cfg80211_can_beacon_sec_chan(local->hw.wiphy, chan, channel_type))
-- 
cgit v1.1


From 405385f8ce7a2ed8f82e216d88b5282142e1288b Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 11 Jan 2012 13:11:50 +0200
Subject: mac80211: set bss_conf.idle when vif is connected

__ieee80211_recalc_idle() iterates through the vifs,
sets bss_conf.idle = true if they are disconnected,
and increases "count" if they are not (which later
gets evaluated in order to determine whether the
device is idle).

However, the loop doesn't set bss_conf.idle = false
(along with increasing "count"), causing the device
idle state and the vif idle state to get out of sync
in some cases.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e47768c..01a21c2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1314,6 +1314,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 			continue;
 		}
 		/* count everything else */
+		sdata->vif.bss_conf.idle = false;
 		count++;
 	}
 
-- 
cgit v1.1


From 68315801dbf3ab2001679fd2074c9dc5dcf87dfa Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Wed, 25 Jan 2012 02:39:05 +0000
Subject: l2tp: l2tp_ip - fix possible oops on packet receive

When a packet is received on an L2TP IP socket (L2TPv3 IP link
encapsulation), the l2tpip socket's backlog_rcv function calls
xfrm4_policy_check(). This is not necessary, since it was called
before the skb was added to the backlog. With CONFIG_NET_NS enabled,
xfrm4_policy_check() will oops if skb->dev is null, so this trivial
patch removes the call.

This bug has always been present, but only when CONFIG_NET_NS is
enabled does it cause problems. Most users are probably using UDP
encapsulation for L2TP, hence the problem has only recently
surfaced.

EIP: 0060:[<c12bb62b>] EFLAGS: 00210246 CPU: 0
EIP is at l2tp_ip_recvmsg+0xd4/0x2a7
EAX: 00000001 EBX: d77b5180 ECX: 00000000 EDX: 00200246
ESI: 00000000 EDI: d63cbd30 EBP: d63cbd18 ESP: d63cbcf4
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Call Trace:
 [<c1218568>] sock_common_recvmsg+0x31/0x46
 [<c1215c92>] __sock_recvmsg_nosec+0x45/0x4d
 [<c12163a1>] __sock_recvmsg+0x31/0x3b
 [<c1216828>] sock_recvmsg+0x96/0xab
 [<c10b2693>] ? might_fault+0x47/0x81
 [<c10b2693>] ? might_fault+0x47/0x81
 [<c1167fd0>] ? _copy_from_user+0x31/0x115
 [<c121e8c8>] ? copy_from_user+0x8/0xa
 [<c121ebd6>] ? verify_iovec+0x3e/0x78
 [<c1216604>] __sys_recvmsg+0x10a/0x1aa
 [<c1216792>] ? sock_recvmsg+0x0/0xab
 [<c105a99b>] ? __lock_acquire+0xbdf/0xbee
 [<c12d5a99>] ? do_page_fault+0x193/0x375
 [<c10d1200>] ? fcheck_files+0x9b/0xca
 [<c10d1259>] ? fget_light+0x2a/0x9c
 [<c1216bbb>] sys_recvmsg+0x2b/0x43
 [<c1218145>] sys_socketcall+0x16d/0x1a5
 [<c11679f0>] ? trace_hardirqs_on_thunk+0xc/0x10
 [<c100305f>] sysenter_do_call+0x12/0x38
Code: c6 05 8c ea a8 c1 01 e8 0c d4 d9 ff 85 f6 74 07 3e ff 86 80 00 00 00 b9 17 b6 2b c1 ba 01 00 00 00 b8 78 ed 48 c1 e8 23 f6 d9 ff <ff> 76 0c 68 28 e3 30 c1 68 2d 44 41 c1 e8 89 57 01 00 83 c4 0c

Signed-off-by: James Chapman <jchapman@katalix.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index d21e7eb..55670ec 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -393,11 +393,6 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
 {
 	int rc;
 
-	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
-		goto drop;
-
-	nf_reset(skb);
-
 	/* Charge it to the socket, dropping if the queue is full. */
 	rc = sock_queue_rcv_skb(sk, skb);
 	if (rc < 0)
-- 
cgit v1.1


From 2b05ad33e1e624e7f08b8676d270dc7725403b7e Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Wed, 25 Jan 2012 08:34:51 +0000
Subject: tcp: bind() fix autoselection to share ports

The current code checks for conflicts when the application
requests a specific port.  If there is no conflict, then
the request is granted.

On the other hand, the port autoselection done by the kernel
fails when all ports are bound even when there is a port
with no conflict available.

The fix changes port autoselection to check if there is a
conflict and use it if not.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 2e4e244..ecd19b5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -128,6 +128,11 @@ again:
 							goto have_snum;
 						}
 					}
+					if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+						spin_unlock(&head->lock);
+						snum = rover;
+						goto have_snum;
+					}
 					goto next;
 				}
 			break;
-- 
cgit v1.1


From fddb7b5761f104f034a0e708ece756d9b2eb2cac Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Wed, 25 Jan 2012 08:34:52 +0000
Subject: tcp: bind() optimize port allocation

Port autoselection finds a port and then drop the lock,
then right after that, gets the hash bucket again and lock it.

Fix it to go direct.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ecd19b5..19d66ce 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -123,15 +123,13 @@ again:
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
 						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
-							spin_unlock(&head->lock);
 							snum = smallest_rover;
-							goto have_snum;
+							goto tb_found;
 						}
 					}
 					if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
-						spin_unlock(&head->lock);
 						snum = rover;
-						goto have_snum;
+						goto tb_found;
 					}
 					goto next;
 				}
-- 
cgit v1.1


From 073862ba5d249c20bd5c49fc6d904ff0e1f6a672 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 26 Jan 2012 00:41:38 +0000
Subject: netns: fix net_alloc_generic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a new net namespace is created, we should attach to it a "struct
net_generic" with enough slots (even empty), or we can hit the following
BUG_ON() :

[  200.752016] kernel BUG at include/net/netns/generic.h:40!
...
[  200.752016]  [<ffffffff825c3cea>] ? get_cfcnfg+0x3a/0x180
[  200.752016]  [<ffffffff821cf0b0>] ? lockdep_rtnl_is_held+0x10/0x20
[  200.752016]  [<ffffffff825c41be>] caif_device_notify+0x2e/0x530
[  200.752016]  [<ffffffff810d61b7>] notifier_call_chain+0x67/0x110
[  200.752016]  [<ffffffff810d67c1>] raw_notifier_call_chain+0x11/0x20
[  200.752016]  [<ffffffff821bae82>] call_netdevice_notifiers+0x32/0x60
[  200.752016]  [<ffffffff821c2b26>] register_netdevice+0x196/0x300
[  200.752016]  [<ffffffff821c2ca9>] register_netdev+0x19/0x30
[  200.752016]  [<ffffffff81c1c67a>] loopback_net_init+0x4a/0xa0
[  200.752016]  [<ffffffff821b5e62>] ops_init+0x42/0x180
[  200.752016]  [<ffffffff821b600b>] setup_net+0x6b/0x100
[  200.752016]  [<ffffffff821b6466>] copy_net_ns+0x86/0x110
[  200.752016]  [<ffffffff810d5789>] create_new_namespaces+0xd9/0x190

net_alloc_generic() should take into account the maximum index into the
ptr array, as a subsystem might use net_generic() anytime.

This also reduces number of reallocations in net_assign_generic()

Reported-by: Sasha Levin <levinsasha928@gmail.com>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Sjur Brændeland <sjur.brandeland@stericsson.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index aefcd7a..0e950fd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -30,6 +30,20 @@ EXPORT_SYMBOL(init_net);
 
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
+static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
+
+static struct net_generic *net_alloc_generic(void)
+{
+	struct net_generic *ng;
+	size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+	ng = kzalloc(generic_size, GFP_KERNEL);
+	if (ng)
+		ng->len = max_gen_ptrs;
+
+	return ng;
+}
+
 static int net_assign_generic(struct net *net, int id, void *data)
 {
 	struct net_generic *ng, *old_ng;
@@ -43,8 +57,7 @@ static int net_assign_generic(struct net *net, int id, void *data)
 	if (old_ng->len >= id)
 		goto assign;
 
-	ng = kzalloc(sizeof(struct net_generic) +
-			id * sizeof(void *), GFP_KERNEL);
+	ng = net_alloc_generic();
 	if (ng == NULL)
 		return -ENOMEM;
 
@@ -59,7 +72,6 @@ static int net_assign_generic(struct net *net, int id, void *data)
 	 * the old copy for kfree after a grace period.
 	 */
 
-	ng->len = id;
 	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
 
 	rcu_assign_pointer(net->gen, ng);
@@ -161,18 +173,6 @@ out_undo:
 	goto out;
 }
 
-static struct net_generic *net_alloc_generic(void)
-{
-	struct net_generic *ng;
-	size_t generic_size = sizeof(struct net_generic) +
-		INITIAL_NET_GEN_PTRS * sizeof(void *);
-
-	ng = kzalloc(generic_size, GFP_KERNEL);
-	if (ng)
-		ng->len = INITIAL_NET_GEN_PTRS;
-
-	return ng;
-}
 
 #ifdef CONFIG_NET_NS
 static struct kmem_cache *net_cachep;
@@ -483,6 +483,7 @@ again:
 			}
 			return error;
 		}
+		max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
 	}
 	error = __register_pernet_operations(list, ops);
 	if (error) {
-- 
cgit v1.1


From f2b3ee9e4200b32d113b1bd3c93f9a836c97357c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 26 Jan 2012 10:34:35 +0000
Subject: ipv6: Fix ip_gre lockless xmits.

Tunnel devices set NETIF_F_LLTX to bypass HARD_TX_LOCK.  Sit and
ipip set this unconditionally in ops->setup, but gre enables it
conditionally after parameter passing in ops->newlink. This is
not called during tunnel setup as below, however, so GRE tunnels are
still taking the lock.

modprobe ip_gre
ip tunnel add test0 mode gre remote 10.5.1.1 dev lo
ip link set test0 up
ip addr add 10.6.0.1 dev test0
 # cat /sys/class/net/test0/features
 # $DIR/test_tunnel_xmit 10 10.5.2.1
ip route add 10.5.2.0/24 dev test0
ip tunnel del test0

The newlink callback is only called in rtnl_netlink, and only if
the device is new, as it calls register_netdevice internally. Gre
tunnels are created at 'ip tunnel add' with ioctl SIOCADDTUNNEL,
which calls ipgre_tunnel_locate, which calls register_netdev.
rtnl_newlink is called at 'ip link set', but skips ops->newlink
and the device is up with locking still enabled. The equivalent
ipip tunnel works fine, btw (just substitute 'method gre' for
'method ipip').

On kernels before /sys/class/net/*/features was removed [1],
the first commented out line returns 0x6000 with method gre,
which indicates that NETIF_F_LLTX (0x1000) is not set. With ipip,
it reports 0x7000. This test cannot be used on recent kernels where
the sysfs file is removed (and ETHTOOL_GFEATURES does not currently
work for tunnel devices, because they lack dev->ethtool_ops).

The second commented out line calls a simple transmission test [2]
that sends on 24 cores at maximum rate. Results of a single run:

ipip:			19,372,306
gre before patch:	 4,839,753
gre after patch:	19,133,873

This patch replicates the condition check in ipgre_newlink to
ipgre_tunnel_locate. It works for me, both with oseq on and off.
This is the first time I looked at rtnetlink and iproute2 code,
though, so someone more knowledgeable should probably check the
patch. Thanks.

The tail of both functions is now identical, by the way. To avoid
code duplication, I'll be happy to rework this and merge the two.

[1] http://patchwork.ozlabs.org/patch/104610/
[2] http://kernel.googlecode.com/files/xmit_udp_parallel.c

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2b53a1f..6b3ca5b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -422,6 +422,10 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
 	if (register_netdevice(dev) < 0)
 		goto failed_free;
 
+	/* Can use a lockless transmit, unless we generate output sequences */
+	if (!(nt->parms.o_flags & GRE_SEQ))
+		dev->features |= NETIF_F_LLTX;
+
 	dev_hold(dev);
 	ipgre_tunnel_link(ign, nt);
 	return nt;
-- 
cgit v1.1


From f18da14565819ba43b8321237e2426a2914cc2ef Mon Sep 17 00:00:00 2001
From: Stefan Gula <steweg@gmail.com>
Date: Thu, 26 Jan 2012 11:01:06 +0000
Subject: net: RTNETLINK adjusting values of min_ifinfo_dump_size

Setting link parameters on a netdevice changes the value
of if_nlmsg_size(), therefore it is necessary to recalculate
min_ifinfo_dump_size.

Signed-off-by: Stefan Gula <steweg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f16444b..65aebd4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1509,6 +1509,9 @@ errout:
 
 	if (send_addr_notify)
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	min_ifinfo_dump_size = max_t(u16, if_nlmsg_size(dev),
+				     min_ifinfo_dump_size);
+
 	return err;
 }
 
-- 
cgit v1.1


From 8a8ee9aff6c3077dd9c2c7a77478e8ed362b96c6 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Jan 2012 14:04:53 +0000
Subject: net caif: Register properly as a pernet subsystem.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

caif is a subsystem and as such it needs to register with
register_pernet_subsys instead of register_pernet_device.

Among other problems using register_pernet_device was resulting in
net_generic being called before the caif_net structure was allocated.
Which has been causing net_generic to fail with either BUG_ON's or by
return NULL pointers.

A more ugly problem that could be caused is packets in flight why the
subsystem is shutting down.

To remove confusion also remove the cruft cause by inappropriately
trying to fix this bug.

With the aid of the previous patch I have tested this patch and
confirmed that using register_pernet_subsys makes the failure go away as
it should.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 22 ++--------------------
 net/caif/cfcnfg.c   |  1 -
 2 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 673728a..82c5706 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -59,8 +59,6 @@ struct cfcnfg *get_cfcnfg(struct net *net)
 {
 	struct caif_net *caifn;
 	caifn = net_generic(net, caif_net_id);
-	if (!caifn)
-		return NULL;
 	return caifn->cfg;
 }
 EXPORT_SYMBOL(get_cfcnfg);
@@ -69,8 +67,6 @@ static struct caif_device_entry_list *caif_device_list(struct net *net)
 {
 	struct caif_net *caifn;
 	caifn = net_generic(net, caif_net_id);
-	if (!caifn)
-		return NULL;
 	return &caifn->caifdevs;
 }
 
@@ -99,8 +95,6 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
 	struct caif_device_entry *caifd;
 
 	caifdevs = caif_device_list(dev_net(dev));
-	if (!caifdevs)
-		return NULL;
 
 	caifd = kzalloc(sizeof(*caifd), GFP_KERNEL);
 	if (!caifd)
@@ -120,8 +114,6 @@ static struct caif_device_entry *caif_get(struct net_device *dev)
 	struct caif_device_entry_list *caifdevs =
 	    caif_device_list(dev_net(dev));
 	struct caif_device_entry *caifd;
-	if (!caifdevs)
-		return NULL;
 
 	list_for_each_entry_rcu(caifd, &caifdevs->list, list) {
 		if (caifd->netdev == dev)
@@ -321,8 +313,6 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
 	struct caif_device_entry_list *caifdevs;
 
 	caifdevs = caif_device_list(dev_net(dev));
-	if (!cfg || !caifdevs)
-		return;
 	caifd = caif_device_alloc(dev);
 	if (!caifd)
 		return;
@@ -374,8 +364,6 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 
 	cfg = get_cfcnfg(dev_net(dev));
 	caifdevs = caif_device_list(dev_net(dev));
-	if (!cfg || !caifdevs)
-		return 0;
 
 	caifd = caif_get(dev);
 	if (caifd == NULL && dev->type != ARPHRD_CAIF)
@@ -507,9 +495,6 @@ static struct notifier_block caif_device_notifier = {
 static int caif_init_net(struct net *net)
 {
 	struct caif_net *caifn = net_generic(net, caif_net_id);
-	if (WARN_ON(!caifn))
-		return -EINVAL;
-
 	INIT_LIST_HEAD(&caifn->caifdevs.list);
 	mutex_init(&caifn->caifdevs.lock);
 
@@ -527,9 +512,6 @@ static void caif_exit_net(struct net *net)
 	    caif_device_list(net);
 	struct cfcnfg *cfg =  get_cfcnfg(net);
 
-	if (!cfg || !caifdevs)
-		return;
-
 	rtnl_lock();
 	mutex_lock(&caifdevs->lock);
 
@@ -569,7 +551,7 @@ static int __init caif_device_init(void)
 {
 	int result;
 
-	result = register_pernet_device(&caif_net_ops);
+	result = register_pernet_subsys(&caif_net_ops);
 
 	if (result)
 		return result;
@@ -582,7 +564,7 @@ static int __init caif_device_init(void)
 
 static void __exit caif_device_exit(void)
 {
-	unregister_pernet_device(&caif_net_ops);
+	unregister_pernet_subsys(&caif_net_ops);
 	unregister_netdevice_notifier(&caif_device_notifier);
 	dev_remove_pack(&caif_packet_type);
 }
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 598aafb..ba9cfd4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -309,7 +309,6 @@ int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
 	int err;
 	struct cfctrl_link_param param;
 	struct cfcnfg *cfg = get_cfcnfg(net);
-	caif_assert(cfg != NULL);
 
 	rcu_read_lock();
 	err = caif_connect_req_to_link_param(cfg, conn_req, &param);
-- 
cgit v1.1


From 4acb41903b2f99f3dffd4c3df9acc84ca5942cb2 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Mon, 30 Jan 2012 01:20:17 +0000
Subject: net/tcp: Fix tcp memory limits initialization when !CONFIG_SYSCTL

sysctl_tcp_mem() initialization was moved to sysctl_tcp_ipv4.c
in commit 3dc43e3e4d0b52197d3205214fe8f162f9e0c334, since it
became a per-ns value.

That code, however, will never run when CONFIG_SYSCTL is
disabled, leading to bogus values on those fields - causing hung
TCP sockets.

This patch fixes it by keeping an initialization code in
tcp_init(). It will be overwritten by the first net namespace
init if CONFIG_SYSCTL is compiled in, and do the right thing if
it is compiled out.

It is also named properly as tcp_init_mem(), to properly signal
its non-sysctl side effect on TCP limits.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: David S. Miller <davem@davemloft.net>
Link: http://lkml.kernel.org/r/4F22D05A.8030604@parallels.com
[ renamed the function, tidied up the changelog a bit ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c |  1 +
 net/ipv4/tcp.c             | 16 +++++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4aa7e9d..4cb9cd2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -814,6 +814,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 
 	net->ipv4.sysctl_rt_cache_rebuild_count = 4;
 
+	tcp_init_mem(net);
 	limit = nr_free_buffer_pages() / 8;
 	limit = max(limit, 128UL);
 	net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9bcdec3..06373b4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3216,6 +3216,16 @@ static int __init set_thash_entries(char *str)
 }
 __setup("thash_entries=", set_thash_entries);
 
+void tcp_init_mem(struct net *net)
+{
+	/* Set per-socket limits to no more than 1/128 the pressure threshold */
+	unsigned long limit = nr_free_buffer_pages() / 8;
+	limit = max(limit, 128UL);
+	net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
+	net->ipv4.sysctl_tcp_mem[1] = limit;
+	net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
+}
+
 void __init tcp_init(void)
 {
 	struct sk_buff *skb = NULL;
@@ -3276,9 +3286,9 @@ void __init tcp_init(void)
 	sysctl_tcp_max_orphans = cnt / 2;
 	sysctl_max_syn_backlog = max(128, cnt / 256);
 
-	/* Set per-socket limits to no more than 1/128 the pressure threshold */
-	limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
-		<< (PAGE_SHIFT - 7);
+	tcp_init_mem(&init_net);
+	limit = nr_free_buffer_pages() / 8;
+	limit = max(limit, 128UL);
 	max_share = min(4UL*1024*1024, limit);
 
 	sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
-- 
cgit v1.1


From 5b35e1e6e9ca651e6b291c96d1106043c9af314a Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 28 Jan 2012 17:29:46 +0000
Subject: tcp: fix tcp_trim_head() to adjust segment count with skb MSS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit fixes tcp_trim_head() to recalculate the number of
segments in the skb with the skb's existing MSS, so trimming the head
causes the skb segment count to be monotonically non-increasing - it
should stay the same or go down, but not increase.

Previously tcp_trim_head() used the current MSS of the connection. But
if there was a decrease in MSS between original transmission and ACK
(e.g. due to PMTUD), this could cause tcp_trim_head() to
counter-intuitively increase the segment count when trimming bytes off
the head of an skb. This violated assumptions in tcp_tso_acked() that
tcp_trim_head() only decreases the packet count, so that packets_acked
in tcp_tso_acked() could underflow, leading tcp_clean_rtx_queue() to
pass u32 pkts_acked values as large as 0xffffffff to
ca_ops->pkts_acked().

As an aside, if tcp_trim_head() had really wanted the skb to reflect
the current MSS, it should have called tcp_set_skb_tso_segs()
unconditionally, since a decrease in MSS would mean that a
single-packet skb should now be sliced into multiple segments.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c8de27..4ff3b6d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1141,11 +1141,9 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	sk_mem_uncharge(sk, len);
 	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
 
-	/* Any change of skb->len requires recalculation of tso
-	 * factor and mss.
-	 */
+	/* Any change of skb->len requires recalculation of tso factor. */
 	if (tcp_skb_pcount(skb) > 1)
-		tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
+		tcp_set_skb_tso_segs(sk, skb, tcp_skb_mss(skb));
 
 	return 0;
 }
-- 
cgit v1.1


From 6f01fd6e6f6809061b56e78f1e8d143099716d70 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 28 Jan 2012 16:11:03 +0000
Subject: af_unix: fix EPOLLET regression for stream sockets

Commit 0884d7aa24 (AF_UNIX: Fix poll blocking problem when reading from
a stream socket) added a regression for epoll() in Edge Triggered mode
(EPOLLET)

Appropriate fix is to use skb_peek()/skb_unlink() instead of
skb_dequeue(), and only call skb_unlink() when skb is fully consumed.

This remove the need to requeue a partial skb into sk_receive_queue head
and the extra sk->sk_data_ready() calls that added the regression.

This is safe because once skb is given to sk_receive_queue, it is not
modified by a writer, and readers are serialized by u->readlock mutex.

This also reduce number of spinlock acquisition for small reads or
MSG_PEEK users so should improve overall performance.

Reported-by: Nick Mathewson <nickm@freehaven.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Alexey Moiseytsev <himeraster@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aad8fb6..85d3bb7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1918,7 +1918,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		struct sk_buff *skb;
 
 		unix_state_lock(sk);
-		skb = skb_dequeue(&sk->sk_receive_queue);
+		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb == NULL) {
 			unix_sk(sk)->recursion_level = 0;
 			if (copied >= target)
@@ -1958,11 +1958,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if (check_creds) {
 			/* Never glue messages from different writers */
 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
-			    (UNIXCB(skb).cred != siocb->scm->cred)) {
-				skb_queue_head(&sk->sk_receive_queue, skb);
-				sk->sk_data_ready(sk, skb->len);
+			    (UNIXCB(skb).cred != siocb->scm->cred))
 				break;
-			}
 		} else {
 			/* Copy credentials */
 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
@@ -1977,8 +1974,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		chunk = min_t(unsigned int, skb->len, size);
 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
-			skb_queue_head(&sk->sk_receive_queue, skb);
-			sk->sk_data_ready(sk, skb->len);
 			if (copied == 0)
 				copied = -EFAULT;
 			break;
@@ -1993,13 +1988,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			if (UNIXCB(skb).fp)
 				unix_detach_fds(siocb->scm, skb);
 
-			/* put the skb back if we didn't use it up.. */
-			if (skb->len) {
-				skb_queue_head(&sk->sk_receive_queue, skb);
-				sk->sk_data_ready(sk, skb->len);
+			if (skb->len)
 				break;
-			}
 
+			skb_unlink(skb, &sk->sk_receive_queue);
 			consume_skb(skb);
 
 			if (siocb->scm->fp)
@@ -2010,9 +2002,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			if (UNIXCB(skb).fp)
 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
 
-			/* put message back and return */
-			skb_queue_head(&sk->sk_receive_queue, skb);
-			sk->sk_data_ready(sk, skb->len);
 			break;
 		}
 	} while (size);
-- 
cgit v1.1


From efcdbf24fd5daa88060869e51ed49f68b7ac8708 Mon Sep 17 00:00:00 2001
From: Arun Sharma <asharma@fb.com>
Date: Mon, 30 Jan 2012 14:16:06 -0800
Subject: net: Disambiguate kernel message

Some of our machines were reporting:

TCP: too many of orphaned sockets

even when the number of orphaned sockets was well below the
limit.

We print a different message depending on whether we're out
of TCP memory or there are too many orphaned sockets.

Also move the check out of line and cleanup the messages
that were printed.

Signed-off-by: Arun Sharma <asharma@fb.com>
Suggested-by: Mohan Srinivasan <mohan@fb.com>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: David Miller <davem@davemloft.net>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c       | 19 +++++++++++++++----
 net/ipv4/tcp_timer.c |  5 +----
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 06373b4..a34f5cf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1876,6 +1876,20 @@ void tcp_shutdown(struct sock *sk, int how)
 }
 EXPORT_SYMBOL(tcp_shutdown);
 
+bool tcp_check_oom(struct sock *sk, int shift)
+{
+	bool too_many_orphans, out_of_socket_memory;
+
+	too_many_orphans = tcp_too_many_orphans(sk, shift);
+	out_of_socket_memory = tcp_out_of_memory(sk);
+
+	if (too_many_orphans && net_ratelimit())
+		pr_info("TCP: too many orphaned sockets\n");
+	if (out_of_socket_memory && net_ratelimit())
+		pr_info("TCP: out of memory -- consider tuning tcp_mem\n");
+	return too_many_orphans || out_of_socket_memory;
+}
+
 void tcp_close(struct sock *sk, long timeout)
 {
 	struct sk_buff *skb;
@@ -2015,10 +2029,7 @@ adjudge_to_death:
 	}
 	if (sk->sk_state != TCP_CLOSE) {
 		sk_mem_reclaim(sk);
-		if (tcp_too_many_orphans(sk, 0)) {
-			if (net_ratelimit())
-				printk(KERN_INFO "TCP: too many of orphaned "
-				       "sockets\n");
+		if (tcp_check_oom(sk, 0)) {
 			tcp_set_state(sk, TCP_CLOSE);
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 			NET_INC_STATS_BH(sock_net(sk),
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a516d1e..cd2e072 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -77,10 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
 	if (sk->sk_err_soft)
 		shift++;
 
-	if (tcp_too_many_orphans(sk, shift)) {
-		if (net_ratelimit())
-			printk(KERN_INFO "Out of socket memory\n");
-
+	if (tcp_check_oom(sk, shift)) {
 		/* Catch exceptional cases, when connection requires reset.
 		 *      1. Last segment was sent recently. */
 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
-- 
cgit v1.1


From 786f528119722f564a22ad953411374e06116333 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 1 Feb 2012 09:32:25 +0000
Subject: ethtool: Null-terminate filename passed to ethtool_ops::flash_device

The parameters for ETHTOOL_FLASHDEV include a filename, which ought to
be null-terminated.  Currently the only driver that implements
ethtool_ops::flash_device attempts to add a null terminator if
necessary, but does it wrongly.  Do it in the ethtool core instead.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 369b418..3f79db1 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1190,6 +1190,8 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
 	if (!dev->ethtool_ops->flash_device)
 		return -EOPNOTSUPP;
 
+	efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
+
 	return dev->ethtool_ops->flash_device(dev, &efl);
 }
 
-- 
cgit v1.1


From 07ae2dfcf4f7143ce191c6436da1c33f179af0d6 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 1 Feb 2012 18:48:09 +0200
Subject: mac80211: timeout a single frame in the rx reorder buffer

The current code checks for stored_mpdu_num > 1, causing
the reorder_timer to be triggered indefinitely, but the
frame is never timed-out (until the next packet is received)

Signed-off-by: Eliad Peller <eliad@wizery.com>
Cc: <stable@vger.kernel.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7514091..5a5e504 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -611,7 +611,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
 	index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
 						tid_agg_rx->buf_size;
 	if (!tid_agg_rx->reorder_buf[index] &&
-	    tid_agg_rx->stored_mpdu_num > 1) {
+	    tid_agg_rx->stored_mpdu_num) {
 		/*
 		 * No buffers ready to be released, but check whether any
 		 * frames in the reorder buffer have timed out.
-- 
cgit v1.1


From c43b874d5d714f271b80d4c3f49e05d0cbf51ed2 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 2 Feb 2012 00:07:00 +0000
Subject: tcp: properly initialize tcp memory limits

Commit 4acb4190 tries to fix the using uninitialized value
introduced by commit 3dc43e3,  but it would make the
per-socket memory limits too small.

This patch fixes this and also remove the redundant codes
introduced in 4acb4190.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 6 ------
 net/ipv4/tcp.c             | 4 ++--
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4cb9cd2..7a7724d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -778,7 +778,6 @@ EXPORT_SYMBOL_GPL(net_ipv4_ctl_path);
 static __net_init int ipv4_sysctl_init_net(struct net *net)
 {
 	struct ctl_table *table;
-	unsigned long limit;
 
 	table = ipv4_net_table;
 	if (!net_eq(net, &init_net)) {
@@ -815,11 +814,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 	net->ipv4.sysctl_rt_cache_rebuild_count = 4;
 
 	tcp_init_mem(net);
-	limit = nr_free_buffer_pages() / 8;
-	limit = max(limit, 128UL);
-	net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
-	net->ipv4.sysctl_tcp_mem[1] = limit;
-	net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
 
 	net->ipv4.ipv4_hdr = register_net_sysctl_table(net,
 			net_ipv4_ctl_path, table);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a34f5cf..37755cc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3229,7 +3229,6 @@ __setup("thash_entries=", set_thash_entries);
 
 void tcp_init_mem(struct net *net)
 {
-	/* Set per-socket limits to no more than 1/128 the pressure threshold */
 	unsigned long limit = nr_free_buffer_pages() / 8;
 	limit = max(limit, 128UL);
 	net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
@@ -3298,7 +3297,8 @@ void __init tcp_init(void)
 	sysctl_max_syn_backlog = max(128, cnt / 256);
 
 	tcp_init_mem(&init_net);
-	limit = nr_free_buffer_pages() / 8;
+	/* Set per-socket limits to no more than 1/128 the pressure threshold */
+	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10);
 	limit = max(limit, 128UL);
 	max_share = min(4UL*1024*1024, limit);
 
-- 
cgit v1.1


From b01377a4200d0dfc7b04a8daabb4739727353703 Mon Sep 17 00:00:00 2001
From: "sjur.brandeland@stericsson.com" <sjur.brandeland@stericsson.com>
Date: Thu, 2 Feb 2012 01:21:02 +0000
Subject: caif: Bugfix list_del_rcu race in cfmuxl_ctrlcmd.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Always use cfmuxl_remove_uplayer when removing a up-layer.
cfmuxl_ctrlcmd() can be called independently and in parallel with
cfmuxl_remove_uplayer(). The race between them could cause list_del_rcu
to be called on a node which has been already taken out from the list.
That lead to a (rare) crash on accessing poisoned node->prev inside
list_del_rcu.

This fix ensures that deletion are done holding the same lock.

Reported-by: Dmitry Tarnyagin <dmitry.tarnyagin@stericsson.com>
Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfmuxl.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index b36f24a..94b0861 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -248,7 +248,6 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 {
 	struct cfmuxl *muxl = container_obj(layr);
 	struct cflayer *layer;
-	int idx;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(layer, &muxl->srvl_list, node) {
@@ -257,14 +256,9 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 
 			if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND ||
 				ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) &&
-					layer->id != 0) {
-
-				idx = layer->id % UP_CACHE_SIZE;
-				spin_lock_bh(&muxl->receive_lock);
-				RCU_INIT_POINTER(muxl->up_cache[idx], NULL);
-				list_del_rcu(&layer->node);
-				spin_unlock_bh(&muxl->receive_lock);
-			}
+					layer->id != 0)
+				cfmuxl_remove_uplayer(layr, layer->id);
+
 			/* NOTE: ctrlcmd is not allowed to block */
 			layer->ctrlcmd(layer, ctrl, phyid);
 		}
-- 
cgit v1.1


From ba7605745d5c99f0e71b3ec6c7cb5ed6afe540ad Mon Sep 17 00:00:00 2001
From: Dmitry Tarnyagin <dmitry.tarnyagin@stericsson.com>
Date: Thu, 2 Feb 2012 01:21:03 +0000
Subject: caif: Bugfix double kfree_skb upon xmit failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SKB is freed twice upon send error. The Network stack consumes SKB even
when it returns error code.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index a986280..a97d97a 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -539,8 +539,10 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
 	pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb);
 	memset(skb->cb, 0, sizeof(struct caif_payload_info));
 
-	if (cf_sk->layer.dn == NULL)
+	if (cf_sk->layer.dn == NULL) {
+		kfree_skb(skb);
 		return -EINVAL;
+	}
 
 	return cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
 }
@@ -683,10 +685,10 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		}
 		err = transmit_skb(skb, cf_sk,
 				msg->msg_flags&MSG_DONTWAIT, timeo);
-		if (err < 0) {
-			kfree_skb(skb);
+		if (err < 0)
+			/* skb is already freed */
 			goto pipe_err;
-		}
+
 		sent += size;
 	}
 
-- 
cgit v1.1


From 5962b35c1de3254a2f03b95efd3b7854b874d7b7 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 3 Feb 2012 05:18:43 +0000
Subject: netprio_cgroup: Fix obo in get_prioidx

It was recently pointed out to me that the get_prioidx function sets a bit in
the prioidx map prior to checking to see if the index being set is out of
bounds.  This patch corrects that, avoiding the possiblity of us writing beyond
the end of the array

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Stanislaw Gruszka <sgruszka@redhat.com>
CC: Stanislaw Gruszka <sgruszka@redhat.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netprio_cgroup.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 3a9fd48..9ae183a 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -58,11 +58,12 @@ static int get_prioidx(u32 *prio)
 
 	spin_lock_irqsave(&prioidx_map_lock, flags);
 	prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
+	if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
+		spin_unlock_irqrestore(&prioidx_map_lock, flags);
+		return -ENOSPC;
+	}
 	set_bit(prioidx, prioidx_map);
 	spin_unlock_irqrestore(&prioidx_map_lock, flags);
-	if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ)
-		return -ENOSPC;
-
 	atomic_set(&max_prioidx, prioidx);
 	*prio = prioidx;
 	return 0;
-- 
cgit v1.1