From 004fa5ed08cc5d3188db42c05d6b80feaae004c2 Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Wed, 10 Dec 2014 14:19:53 +0200
Subject: Bluetooth: 6lowpan: Do not free skb when packet is dropped

If we need to drop the message because of some error in the
compression etc, then do not free the skb as that is done
automatically in other part of networking stack.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/6lowpan.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 76617be..c989253 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -390,7 +390,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
 drop:
 	dev->stats.rx_dropped++;
-	kfree_skb(skb);
 	return NET_RX_DROP;
 }
 
-- 
cgit v1.1


From 51bda2bca53b265715ca1852528f38dc67429d9a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Dec 2014 06:20:57 +0000
Subject: Bluetooth: hidp_connection_add() unsafe use of l2cap_pi()

it's OK after we'd verified the sockets, but not before that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hidp/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index cc25d0b..07348e1 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1314,13 +1314,14 @@ int hidp_connection_add(struct hidp_connadd_req *req,
 {
 	struct hidp_session *session;
 	struct l2cap_conn *conn;
-	struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan;
+	struct l2cap_chan *chan;
 	int ret;
 
 	ret = hidp_verify_sockets(ctrl_sock, intr_sock);
 	if (ret)
 		return ret;
 
+	chan = l2cap_pi(ctrl_sock->sk)->chan;
 	conn = NULL;
 	l2cap_chan_lock(chan);
 	if (chan->conn)
-- 
cgit v1.1


From 96c26653ce65bf84f3212f8b00d4316c1efcbf4c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Dec 2014 06:20:58 +0000
Subject: Bluetooth: cmtp: cmtp_add_connection() should verify that it's
 dealing with l2cap socket

... rather than relying on ciptool(8) never passing it anything else.  Give
it e.g. an AF_UNIX connected socket (from socketpair(2)) and it'll oops,
trying to evaluate &l2cap_pi(sock->sk)->chan->dst...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/cmtp/core.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 67fe5e8..278a194 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -334,6 +334,9 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
 
 	BT_DBG("");
 
+	if (!l2cap_is_socket(sock))
+		return -EBADFD;
+
 	session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL);
 	if (!session)
 		return -ENOMEM;
-- 
cgit v1.1


From 71bb99a02b32b4cc4265118e85f6035ca72923f0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Dec 2014 06:20:59 +0000
Subject: Bluetooth: bnep: bnep_add_connection() should verify that it's
 dealing with l2cap socket

same story as cmtp

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/bnep/core.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 85bcc21..ce82722d 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -533,6 +533,9 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
 
 	BT_DBG("");
 
+	if (!l2cap_is_socket(sock))
+		return -EBADFD;
+
 	baswap((void *) dst, &l2cap_pi(sock->sk)->chan->dst);
 	baswap((void *) src, &l2cap_pi(sock->sk)->chan->src);
 
-- 
cgit v1.1


From da413eec729dae5dcb150e2eb34c5e7e5e4e1b49 Mon Sep 17 00:00:00 2001
From: Dan Collins <dan@dcollins.co.nz>
Date: Fri, 19 Dec 2014 16:49:25 +1300
Subject: packet: Fixed TPACKET V3 to signal poll when block is closed rather
 than every packet

Make TPACKET_V3 signal poll when block is closed rather than for every
packet. Side effect is that poll will be signaled when block retire
timer expires which didn't previously happen. Issue was visible when
sending packets at a very low frequency such that all blocks are retired
before packets are received by TPACKET_V3. This caused avoidable packet
loss. The fix ensures that the signal is sent when blocks are closed
which covers the normal path where the block is filled as well as the
path where the timer expires. The case where a block is filled without
moving to the next block (ie. all blocks are full) will still cause poll
to be signaled.

Signed-off-by: Dan Collins <dan@dcollins.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e52a447..6880f34a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -785,6 +785,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
 
 	struct tpacket3_hdr *last_pkt;
 	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
+	struct sock *sk = &po->sk;
 
 	if (po->stats.stats3.tp_drops)
 		status |= TP_STATUS_LOSING;
@@ -809,6 +810,8 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
 	/* Flush the block */
 	prb_flush_block(pkc1, pbd1, status);
 
+	sk->sk_data_ready(sk);
+
 	pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
 }
 
@@ -2052,12 +2055,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	smp_wmb();
 #endif
 
-	if (po->tp_version <= TPACKET_V2)
+	if (po->tp_version <= TPACKET_V2) {
 		__packet_set_status(po, h.raw, status);
-	else
+		sk->sk_data_ready(sk);
+	} else {
 		prb_clear_blk_fill_status(&po->rx_ring);
-
-	sk->sk_data_ready(sk);
+	}
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
-- 
cgit v1.1


From 2dc49d1680b534877fd20cce52557ea542bb06b6 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 22 Dec 2014 18:22:48 +0100
Subject: tcp6: don't move IP6CB before xfrm6_policy_check()

When xfrm6_policy_check() is used, _decode_session6() is called after some
intermediate functions. This function uses IP6CB(), thus TCP_SKB_CB() must be
prepared after the call of xfrm6_policy_check().

Before this patch, scenarii with IPv6 + TCP + IPsec Transport are broken.

Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
Reported-by: Huaibin Wang <huaibin.wang@6wind.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5ff8780..9c0b54e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1387,6 +1387,28 @@ ipv6_pktoptions:
 	return 0;
 }
 
+static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
+			   const struct tcphdr *th)
+{
+	/* This is tricky: we move IP6CB at its correct location into
+	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
+	 * _decode_session6() uses IP6CB().
+	 * barrier() makes sure compiler won't play aliasing games.
+	 */
+	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
+		sizeof(struct inet6_skb_parm));
+	barrier();
+
+	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+				    skb->len - th->doff*4);
+	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
+	TCP_SKB_CB(skb)->sacked = 0;
+}
+
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
 	const struct tcphdr *th;
@@ -1418,24 +1440,9 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 
 	th = tcp_hdr(skb);
 	hdr = ipv6_hdr(skb);
-	/* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
-	 * barrier() makes sure compiler wont play fool^Waliasing games.
-	 */
-	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
-		sizeof(struct inet6_skb_parm));
-	barrier();
-
-	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
-	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
-				    skb->len - th->doff*4);
-	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
-	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
-	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
-	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
-	TCP_SKB_CB(skb)->sacked = 0;
 
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
-				tcp_v6_iif(skb));
+				inet6_iif(skb));
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1451,6 +1458,8 @@ process:
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
+	tcp_v6_fill_cb(skb, hdr, th);
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (tcp_v6_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
@@ -1482,6 +1491,8 @@ no_tcp_socket:
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 		goto discard_it;
 
+	tcp_v6_fill_cb(skb, hdr, th);
+
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 csum_error:
 		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
@@ -1505,6 +1516,8 @@ do_time_wait:
 		goto discard_it;
 	}
 
+	tcp_v6_fill_cb(skb, hdr, th);
+
 	if (skb->len < (th->doff<<2)) {
 		inet_twsk_put(inet_twsk(sk));
 		goto bad_packet;
-- 
cgit v1.1


From af6dabc9c70ae3f307685b1f32f52d60b1bf0527 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 19 Dec 2014 11:09:13 +0800
Subject: net: drop the packet when fails to do software segmentation or header
 check

Commit cecda693a969816bac5e470e1d9c9c0ef5567bca ("net: keep original skb
which only needs header checking during software GSO") keeps the original
skb for packets that only needs header check, but it doesn't drop the
packet if software segmentation or header check were failed.

Fixes cecda693a9 ("net: keep original skb which only needs header checking during software GSO")
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index f411c28..a989f85 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2673,7 +2673,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 
 		segs = skb_gso_segment(skb, features);
 		if (IS_ERR(segs)) {
-			segs = NULL;
+			goto out_kfree_skb;
 		} else if (segs) {
 			consume_skb(skb);
 			skb = segs;
-- 
cgit v1.1


From 5b6698b0e4a37053de35cc24ee695b98a7eb712b Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 20 Dec 2014 13:48:55 +0100
Subject: batman-adv: Calculate extra tail size based on queued fragments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605
("batman-adv: Receive fragmented packets and merge"). The new code provided a
mostly unused parameter skb for the merging function. It is used inside the
function to calculate the additionally needed skb tailroom. But instead of
increasing its own tailroom, it is only increasing the tailroom of the first
queued skb. This is not correct in some situations because the first queued
entry can be a different one than the parameter.

An observed problem was:

1. packet with size 104, total_size 1464, fragno 1 was received
   - packet is queued
2. packet with size 1400, total_size 1464, fragno 0 was received
   - packet is queued at the end of the list
3. enough data was received and can be given to the merge function
   (1464 == (1400 - 20) + (104 - 20))
   - merge functions gets 1400 byte large packet as skb argument
4. merge function gets first entry in queue (104 byte)
   - stored as skb_out
5. merge function calculates the required extra tail as total_size - skb->len
   - pskb_expand_head tail of skb_out with 64 bytes
6. merge function tries to squeeze the extra 1380 bytes from the second queued
   skb (1400 byte aka skb parameter) in the 64 extra tail bytes of skb_out

Instead calculate the extra required tail bytes for skb_out also using skb_out
instead of using the parameter skb. The skb parameter is only used to get the
total_size from the last received packet. This is also the total_size used to
decide that all fragments were received.

Reported-by: Philipp Psurek <philipp.psurek@gmail.com>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Martin Hundebøll <martin@hundeboll.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/batman-adv/fragmentation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index fc1835c..8af3461 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -251,7 +251,7 @@ batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb)
 	kfree(entry);
 
 	/* Make room for the rest of the fragments. */
-	if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) {
+	if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
 		kfree_skb(skb_out);
 		skb_out = NULL;
 		goto free;
-- 
cgit v1.1


From 0402e444cd199389b7fe47be68a67b817e09e097 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 20 Dec 2014 13:48:56 +0100
Subject: batman-adv: Unify fragment size calculation

The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605
("batman-adv: Receive fragmented packets and merge") by an implementation which
can handle up to 16 fragments of a packet. The packet is prepared for the split
in fragments by the function batadv_frag_send_packet and the actual split is
done by batadv_frag_create.

Both functions calculate the size of a fragment themself. But their calculation
differs because batadv_frag_send_packet also subtracts ETH_HLEN. Therefore,
the check in batadv_frag_send_packet "can a full fragment can be created?" may
return true even when batadv_frag_create cannot create a full fragment.

The function batadv_frag_create doesn't check the size of the skb before
splitting it and therefore might try to create a larger fragment than the
remaining buffer. This creates an integer underflow and an invalid len is given
to skb_split.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/batman-adv/fragmentation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 8af3461..00f9e14 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -434,7 +434,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
 	 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
 	 */
 	mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
-	max_fragment_size = (mtu - header_size - ETH_HLEN);
+	max_fragment_size = mtu - header_size;
 	max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
 
 	/* Don't even try to fragment, if we need more than 16 fragments */
-- 
cgit v1.1


From 0d1644919578db525b9a7b6c8197ce02adbfce26 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@meshcoding.com>
Date: Sat, 20 Dec 2014 13:48:57 +0100
Subject: batman-adv: avoid NULL dereferences and fix if check

Gateway having bandwidth_down equal to zero are not accepted
at all and so never added to the Gateway list.
For this reason checking the bandwidth_down member in
batadv_gw_out_of_range() is useless.

This is probably a copy/paste error and this check was supposed
to be "!gw_node" only. Moreover, the way the check is written
now may also lead to a NULL dereference.

Fix this by rewriting the if-condition properly.

Introduced by 414254e342a0d58144de40c3da777521ebaeeb07
("batman-adv: tvlv - gateway download/upload bandwidth container")

Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/batman-adv/gateway_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 90cff58..e0bcf9e 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -810,7 +810,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
 		goto out;
 
 	gw_node = batadv_gw_node_get(bat_priv, orig_dst_node);
-	if (!gw_node->bandwidth_down == 0)
+	if (!gw_node)
 		goto out;
 
 	switch (atomic_read(&bat_priv->gw_mode)) {
-- 
cgit v1.1


From 726ce70e9e4050409243f3a1d735dc86bc6e6e57 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:21 +1100
Subject: net: Move napi polling code out of net_rx_action

This patch creates a new function napi_poll and moves the napi
polling code from net_rx_action into it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 98 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 54 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a989f85..493ae8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4557,6 +4557,59 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+	void *have;
+	int work, weight;
+
+	list_del_init(&n->poll_list);
+
+	have = netpoll_poll_lock(n);
+
+	weight = n->weight;
+
+	/* This NAPI_STATE_SCHED test is for avoiding a race
+	 * with netpoll's poll_napi().  Only the entity which
+	 * obtains the lock and sees NAPI_STATE_SCHED set will
+	 * actually make the ->poll() call.  Therefore we avoid
+	 * accidentally calling ->poll() when NAPI is not scheduled.
+	 */
+	work = 0;
+	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+		work = n->poll(n, weight);
+		trace_napi_poll(n);
+	}
+
+	WARN_ON_ONCE(work > weight);
+
+	if (likely(work < weight))
+		goto out_unlock;
+
+	/* Drivers must not modify the NAPI state if they
+	 * consume the entire weight.  In such cases this code
+	 * still "owns" the NAPI instance and therefore can
+	 * move the instance around on the list at-will.
+	 */
+	if (unlikely(napi_disable_pending(n))) {
+		napi_complete(n);
+		goto out_unlock;
+	}
+
+	if (n->gro_list) {
+		/* flush too old packets
+		 * If HZ < 1000, flush all packets.
+		 */
+		napi_gro_flush(n, HZ >= 1000);
+	}
+
+	list_add_tail(&n->poll_list, repoll);
+
+out_unlock:
+	netpoll_poll_unlock(have);
+
+	return work;
+}
+
 static void net_rx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -4564,7 +4617,6 @@ static void net_rx_action(struct softirq_action *h)
 	int budget = netdev_budget;
 	LIST_HEAD(list);
 	LIST_HEAD(repoll);
-	void *have;
 
 	local_irq_disable();
 	list_splice_init(&sd->poll_list, &list);
@@ -4572,7 +4624,6 @@ static void net_rx_action(struct softirq_action *h)
 
 	while (!list_empty(&list)) {
 		struct napi_struct *n;
-		int work, weight;
 
 		/* If softirq window is exhausted then punt.
 		 * Allow this to run for 2 jiffies since which will allow
@@ -4583,48 +4634,7 @@ static void net_rx_action(struct softirq_action *h)
 
 
 		n = list_first_entry(&list, struct napi_struct, poll_list);
-		list_del_init(&n->poll_list);
-
-		have = netpoll_poll_lock(n);
-
-		weight = n->weight;
-
-		/* This NAPI_STATE_SCHED test is for avoiding a race
-		 * with netpoll's poll_napi().  Only the entity which
-		 * obtains the lock and sees NAPI_STATE_SCHED set will
-		 * actually make the ->poll() call.  Therefore we avoid
-		 * accidentally calling ->poll() when NAPI is not scheduled.
-		 */
-		work = 0;
-		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
-			work = n->poll(n, weight);
-			trace_napi_poll(n);
-		}
-
-		WARN_ON_ONCE(work > weight);
-
-		budget -= work;
-
-		/* Drivers must not modify the NAPI state if they
-		 * consume the entire weight.  In such cases this code
-		 * still "owns" the NAPI instance and therefore can
-		 * move the instance around on the list at-will.
-		 */
-		if (unlikely(work == weight)) {
-			if (unlikely(napi_disable_pending(n))) {
-				napi_complete(n);
-			} else {
-				if (n->gro_list) {
-					/* flush too old packets
-					 * If HZ < 1000, flush all packets.
-					 */
-					napi_gro_flush(n, HZ >= 1000);
-				}
-				list_add_tail(&n->poll_list, &repoll);
-			}
-		}
-
-		netpoll_poll_unlock(have);
+		budget -= napi_poll(n, &repoll);
 	}
 
 	if (!sd_has_rps_ipi_waiting(sd) &&
-- 
cgit v1.1


From 001ce546bb537bb5b7821f05633556a0c9787e32 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:22 +1100
Subject: net: Detect drivers that reschedule NAPI and exhaust budget

The commit d75b1ade567ffab085e8adbbdacf0092d10cd09c (net: less
interrupt masking in NAPI) required drivers to leave poll_list
empty if the entire budget is consumed.

We have already had two broken drivers so let's add a check for
this.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 493ae8e..c0cf129 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4602,6 +4602,15 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		napi_gro_flush(n, HZ >= 1000);
 	}
 
+	/* Some drivers may have called napi_schedule
+	 * prior to exhausting their budget.
+	 */
+	if (unlikely(!list_empty(&n->poll_list))) {
+		pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
+			     n->dev ? n->dev->name : "backlog");
+		goto out_unlock;
+	}
+
 	list_add_tail(&n->poll_list, repoll);
 
 out_unlock:
-- 
cgit v1.1


From 6bd373ebbac4b13ecd39ddc37a0dc5ad4c5e4585 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:24 +1100
Subject: net: Always poll at least one device in net_rx_action

We should only perform the softnet_break check after we have polled
at least one device in net_rx_action.  Otherwise a zero or negative
setting of netdev_budget can lock up the whole system.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index c0cf129..b85eba9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4634,16 +4634,15 @@ static void net_rx_action(struct softirq_action *h)
 	while (!list_empty(&list)) {
 		struct napi_struct *n;
 
+		n = list_first_entry(&list, struct napi_struct, poll_list);
+		budget -= napi_poll(n, &repoll);
+
 		/* If softirq window is exhausted then punt.
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
 		if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
 			goto softnet_break;
-
-
-		n = list_first_entry(&list, struct napi_struct, poll_list);
-		budget -= napi_poll(n, &repoll);
 	}
 
 	if (!sd_has_rps_ipi_waiting(sd) &&
-- 
cgit v1.1


From ceb8d5bf17d366534f32d2f60f41d905a5bc864b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Dec 2014 07:16:25 +1100
Subject: net: Rearrange loop in net_rx_action

This patch rearranges the loop in net_rx_action to reduce the
amount of jumping back and forth when reading the code.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b85eba9..c97ae6f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4631,9 +4631,15 @@ static void net_rx_action(struct softirq_action *h)
 	list_splice_init(&sd->poll_list, &list);
 	local_irq_enable();
 
-	while (!list_empty(&list)) {
+	for (;;) {
 		struct napi_struct *n;
 
+		if (list_empty(&list)) {
+			if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
+				return;
+			break;
+		}
+
 		n = list_first_entry(&list, struct napi_struct, poll_list);
 		budget -= napi_poll(n, &repoll);
 
@@ -4641,15 +4647,13 @@ static void net_rx_action(struct softirq_action *h)
 		 * Allow this to run for 2 jiffies since which will allow
 		 * an average latency of 1.5/HZ.
 		 */
-		if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
-			goto softnet_break;
+		if (unlikely(budget <= 0 ||
+			     time_after_eq(jiffies, time_limit))) {
+			sd->time_squeeze++;
+			break;
+		}
 	}
 
-	if (!sd_has_rps_ipi_waiting(sd) &&
-	    list_empty(&list) &&
-	    list_empty(&repoll))
-		return;
-out:
 	local_irq_disable();
 
 	list_splice_tail_init(&sd->poll_list, &list);
@@ -4659,12 +4663,6 @@ out:
 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 
 	net_rps_action_and_irq_enable(sd);
-
-	return;
-
-softnet_break:
-	sd->time_squeeze++;
-	goto out;
 }
 
 struct netdev_adjacent {
-- 
cgit v1.1


From d0edc7bf397a5e0f312bf8a1e87cfee0019dc07b Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 23 Dec 2014 16:20:11 -0800
Subject: mpls: Fix config check for mpls.

Fixes MPLS GSO for case when mpls is compiled as kernel module.

Fixes: 0d89d2035f ("MPLS: Add limited GSO support").
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index c97ae6f..67b6210 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2522,7 +2522,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 /* If MPLS offload request, verify we are testing hardware MPLS features
  * instead of standard features for the netdev.
  */
-#ifdef CONFIG_NET_MPLS_GSO
+#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
 static netdev_features_t net_mpls_features(struct sk_buff *skb,
 					   netdev_features_t features,
 					   __be16 type)
-- 
cgit v1.1


From 4cc1beca3096a6425543cb83a61665b9c8040f98 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 23 Dec 2014 16:20:16 -0800
Subject: mpls: Fix allowed protocols for mpls gso

MPLS and Tunnel GSO does not work together.  Reject packet which
request such GSO.

Fixes: 0d89d2035f ("MPLS: Add limited GSO support").
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/mpls_gso.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index ca27837..349295d 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -31,10 +31,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_TCPV6 |
 				  SKB_GSO_UDP |
 				  SKB_GSO_DODGY |
-				  SKB_GSO_TCP_ECN |
-				  SKB_GSO_GRE |
-				  SKB_GSO_GRE_CSUM |
-				  SKB_GSO_IPIP)))
+				  SKB_GSO_TCP_ECN)))
 		goto out;
 
 	/* Setup inner SKB. */
-- 
cgit v1.1


From ec449f40bb3e19c77f62ddabf7c1fe3ccefece6f Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 23 Dec 2014 16:20:20 -0800
Subject: openvswitch: Fix MPLS action validation.

Linux stack does not implement GSO for packet with multiple
encapsulations.  Therefore there was check in MPLS action
validation to detect such case, But this check introduced
bug which deleted one or more actions from actions list.
Following patch removes this check to fix the validation.

Fixes: 25cd9ba0abc ("openvswitch: Add basic MPLS support to
kernel").

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Reported-by: Srinivas Neginhal <sneginha@vmware.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 9645a21..d1eecf7 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1753,7 +1753,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 				  __be16 eth_type, __be16 vlan_tci, bool log)
 {
 	const struct nlattr *a;
-	bool out_tnl_port = false;
 	int rem, err;
 
 	if (depth >= SAMPLE_ACTION_DEPTH)
@@ -1796,8 +1795,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 		case OVS_ACTION_ATTR_OUTPUT:
 			if (nla_get_u32(a) >= DP_MAX_PORTS)
 				return -EINVAL;
-			out_tnl_port = false;
-
 			break;
 
 		case OVS_ACTION_ATTR_HASH: {
@@ -1832,12 +1829,6 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 		case OVS_ACTION_ATTR_PUSH_MPLS: {
 			const struct ovs_action_push_mpls *mpls = nla_data(a);
 
-			/* Networking stack do not allow simultaneous Tunnel
-			 * and MPLS GSO.
-			 */
-			if (out_tnl_port)
-				return -EINVAL;
-
 			if (!eth_p_mpls(mpls->mpls_ethertype))
 				return -EINVAL;
 			/* Prohibit push MPLS other than to a white list
@@ -1873,11 +1864,9 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 
 		case OVS_ACTION_ATTR_SET:
 			err = validate_set(a, key, sfa,
-					   &out_tnl_port, eth_type, log);
+					   &skip_copy, eth_type, log);
 			if (err)
 				return err;
-
-			skip_copy = out_tnl_port;
 			break;
 
 		case OVS_ACTION_ATTR_SAMPLE:
-- 
cgit v1.1


From cbe7e76d94f59e89302bd514e4b685e03d1ebbe4 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 23 Dec 2014 16:20:28 -0800
Subject: openvswitch: Fix GSO with multiple MPLS label.

MPLS GSO needs to know inner most protocol to process GSO packets.

Fixes: 25cd9ba0abc ("openvswitch: Add basic MPLS support to
kernel").

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/actions.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 764fdc3..770064c 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -147,7 +147,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	hdr = eth_hdr(skb);
 	hdr->h_proto = mpls->mpls_ethertype;
 
-	skb_set_inner_protocol(skb, skb->protocol);
+	if (!skb->inner_protocol)
+		skb_set_inner_protocol(skb, skb->protocol);
 	skb->protocol = mpls->mpls_ethertype;
 
 	invalidate_flow_key(key);
-- 
cgit v1.1


From 997e068ebc17d8d57e735578df44b6341cd5f2f3 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Tue, 23 Dec 2014 16:20:32 -0800
Subject: openvswitch: Fix vport_send double free

Today vport-send has complex error handling because it involves
freeing skb and updating stats depending on return value from
vport send implementation.
This can be simplified by delegating responsibility of freeing
skb to the vport implementation for all cases. So that
vport-send needs just update stats.

Fixes: 91b7514cdf ("openvswitch: Unify vport error stats
handling")
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/geneve.c              |  6 +++++-
 net/openvswitch/vport-geneve.c |  3 +++
 net/openvswitch/vport-gre.c    | 18 +++++++++++-------
 net/openvswitch/vport-vxlan.c  |  2 ++
 net/openvswitch/vport.c        |  5 ++---
 5 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 95e47c9..394a200 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -122,14 +122,18 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 	int err;
 
 	skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
 			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
 
 	err = skb_cow_head(skb, min_headroom);
-	if (unlikely(err))
+	if (unlikely(err)) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	skb = vlan_hwaccel_push_inside(skb);
 	if (unlikely(!skb))
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 347fa23..484864d 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -219,7 +219,10 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 			      false);
 	if (err < 0)
 		ip_rt_put(rt);
+	return err;
+
 error:
+	kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 6b69df5..28f54e9 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -73,7 +73,7 @@ static struct sk_buff *__build_header(struct sk_buff *skb,
 
 	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
 	if (IS_ERR(skb))
-		return NULL;
+		return skb;
 
 	tpi.flags = filter_tnl_flags(tun_key->tun_flags);
 	tpi.proto = htons(ETH_P_TEB);
@@ -144,7 +144,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
 		err = -EINVAL;
-		goto error;
+		goto err_free_skb;
 	}
 
 	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
@@ -157,8 +157,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	fl.flowi4_proto = IPPROTO_GRE;
 
 	rt = ip_route_output_key(net, &fl);
-	if (IS_ERR(rt))
-		return PTR_ERR(rt);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		goto err_free_skb;
+	}
 
 	tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
 
@@ -183,8 +185,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	/* Push Tunnel header. */
 	skb = __build_header(skb, tunnel_hlen);
-	if (unlikely(!skb)) {
-		err = 0;
+	if (IS_ERR(skb)) {
+		err = PTR_ERR(rt);
+		skb = NULL;
 		goto err_free_rt;
 	}
 
@@ -198,7 +201,8 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
 err_free_rt:
 	ip_rt_put(rt);
-error:
+err_free_skb:
+	kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 38f95a5..d7c46b3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -187,7 +187,9 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 			     false);
 	if (err < 0)
 		ip_rt_put(rt);
+	return err;
 error:
+	kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 9584526..53f3ebb 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -519,10 +519,9 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 		u64_stats_update_end(&stats->syncp);
 	} else if (sent < 0) {
 		ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
-		kfree_skb(skb);
-	} else
+	} else {
 		ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
-
+	}
 	return sent;
 }
 
-- 
cgit v1.1


From 796f2da81bead71ffc91ef70912cd8d1827bf756 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Mon, 22 Dec 2014 19:04:14 +0900
Subject: net: Fix stacked vlan offload features computation

When vlan tags are stacked, it is very likely that the outer tag is stored
in skb->vlan_tci and skb->protocol shows the inner tag's vlan_proto.
Currently netif_skb_features() first looks at skb->protocol even if there
is the outer tag in vlan_tci, thus it incorrectly retrieves the protocol
encapsulated by the inner vlan instead of the inner vlan protocol.
This allows GSO packets to be passed to HW and they end up being
corrupted.

Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 67b6210..bd44e28 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2570,11 +2570,14 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
-	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
-		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-		protocol = veh->h_vlan_encapsulated_proto;
-	} else if (!vlan_tx_tag_present(skb)) {
-		return harmonize_features(skb, features);
+	if (!vlan_tx_tag_present(skb)) {
+		if (unlikely(protocol == htons(ETH_P_8021Q) ||
+			     protocol == htons(ETH_P_8021AD))) {
+			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+			protocol = veh->h_vlan_encapsulated_proto;
+		} else {
+			return harmonize_features(skb, features);
+		}
 	}
 
 	features = netdev_intersect_features(features,
-- 
cgit v1.1


From b8fb4e0648a2ab3734140342002f68fb0c7d1602 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 23 Dec 2014 01:13:18 +0100
Subject: net: Reset secmark when scrubbing packet

skb_scrub_packet() is called when a packet switches between a context
such as between underlay and overlay, between namespaces, or between
L3 subnets.

While we already scrub the packet mark, connection tracking entry,
and cached destination, the security mark/context is left intact.

It seems wrong to inherit the security context of a packet when going
from overlay to underlay or across forwarding paths.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ae13ef6..395c15b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4148,6 +4148,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
+	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
 	nf_reset_trace(skb);
-- 
cgit v1.1


From 8bfe8442ff20fdc2d965c197103d935a99bd3296 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 23 Dec 2014 23:10:48 +0100
Subject: Bluetooth: Fix controller configuration with HCI_QUIRK_INVALID_BDADDR

When controllers set the HCI_QUIRK_INVALID_BDADDR flag, it is required
by userspace to program a valid public Bluetooth device address into
the controller before it can be used.

After successful address configuration, the internal state changes and
the controller runs the complete initialization procedure. However one
small difference is that this is no longer the HCI_SETUP stage. The
HCI_SETUP stage is only valid during initial controller setup. In this
case the stack runs the initialization as part of the HCI_CONFIG stage.

The controller version information, default name and supported commands
are only stored during HCI_SETUP. While these information are static,
they are not read initially when HCI_QUIRK_INVALID_BDADDR is set. So
when running in HCI_CONFIG state, these information need to be updated
as well.

This especially impacts Bluetooth 4.1 and later controllers using
extended feature pages and second event mask page.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Cc: stable@vger.kernel.org # 3.17+
---
 net/bluetooth/hci_event.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 39a5c8a..c03d4b0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -242,7 +242,8 @@ static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 	if (rp->status)
 		return;
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags))
+	if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
+	    test_bit(HCI_CONFIG, &hdev->dev_flags))
 		memcpy(hdev->dev_name, rp->name, HCI_MAX_NAME_LENGTH);
 }
 
@@ -509,7 +510,8 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
 	if (rp->status)
 		return;
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags)) {
+	if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
+	    test_bit(HCI_CONFIG, &hdev->dev_flags)) {
 		hdev->hci_ver = rp->hci_ver;
 		hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
 		hdev->lmp_ver = rp->lmp_ver;
@@ -528,7 +530,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
 	if (rp->status)
 		return;
 
-	if (test_bit(HCI_SETUP, &hdev->dev_flags))
+	if (test_bit(HCI_SETUP, &hdev->dev_flags) ||
+	    test_bit(HCI_CONFIG, &hdev->dev_flags))
 		memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
 }
 
-- 
cgit v1.1


From 6a8fc95c87110a466ee81675b41170b963f82bdb Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 24 Dec 2014 20:43:11 +0200
Subject: Bluetooth: Fix accepting connections when not using mgmt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When connectable mode is enabled (page scan on) through some non-mgmt
method the HCI_CONNECTABLE flag will not be set. For backwards
compatibility with user space versions not using mgmt we should not
require HCI_CONNECTABLE to be set if HCI_MGMT is not set.

Reported-by: Pali Rohár <pali.rohar@gmail.com>
Tested-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable@vger.kernel.org # 3.17+
---
 net/bluetooth/hci_event.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index c03d4b0..3f2e8b8 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2197,7 +2197,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 	}
 
-	if (!test_bit(HCI_CONNECTABLE, &hdev->dev_flags) &&
+	/* Require HCI_CONNECTABLE or a whitelist entry to accept the
+	 * connection. These features are only touched through mgmt so
+	 * only do the checks if HCI_MGMT is set.
+	 */
+	if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
+	    !test_bit(HCI_CONNECTABLE, &hdev->dev_flags) &&
 	    !hci_bdaddr_list_lookup(&hdev->whitelist, &ev->bdaddr,
 				    BDADDR_BREDR)) {
 		    hci_reject_conn(hdev, &ev->bdaddr);
-- 
cgit v1.1


From 4aa6118811c09c75f508f3f070328c71292f1ce4 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 24 Dec 2014 14:41:39 +0800
Subject: openvswitch: fix odd_ptr_err.cocci warnings

net/openvswitch/vport-gre.c:188:5-11: inconsistent IS_ERR and PTR_ERR, PTR_ERR on line 189

 PTR_ERR should access the value just tested by IS_ERR

Semantic patch information:
 There can be false positives in the patch case, where it is the call
 IS_ERR that is wrong.

Generated by: scripts/coccinelle/tests/odd_ptr_err.cocci

CC: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport-gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 28f54e9..d4168c4 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -186,7 +186,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	/* Push Tunnel header. */
 	skb = __build_header(skb, tunnel_hlen);
 	if (IS_ERR(skb)) {
-		err = PTR_ERR(rt);
+		err = PTR_ERR(skb);
 		skb = NULL;
 		goto err_free_rt;
 	}
-- 
cgit v1.1


From 2c26d34bbcc0b3f30385d5587aa232289e2eed8e Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Fri, 19 Dec 2014 15:32:00 -0800
Subject: net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding

When using VXLAN tunnels and a sky2 device, I have experienced
checksum failures of the following type:

[ 4297.761899] eth0: hw csum failure
[...]
[ 4297.765223] Call Trace:
[ 4297.765224]  <IRQ>  [<ffffffff8172f026>] dump_stack+0x46/0x58
[ 4297.765235]  [<ffffffff8162ba52>] netdev_rx_csum_fault+0x42/0x50
[ 4297.765238]  [<ffffffff8161c1a0>] ? skb_push+0x40/0x40
[ 4297.765240]  [<ffffffff8162325c>] __skb_checksum_complete+0xbc/0xd0
[ 4297.765243]  [<ffffffff8168c602>] tcp_v4_rcv+0x2e2/0x950
[ 4297.765246]  [<ffffffff81666ca0>] ? ip_rcv_finish+0x360/0x360

	These are reliably reproduced in a network topology of:

container:eth0 == host(OVS VXLAN on VLAN) == bond0 == eth0 (sky2) -> switch

	When VXLAN encapsulated traffic is received from a similarly
configured peer, the above warning is generated in the receive
processing of the encapsulated packet.  Note that the warning is
associated with the container eth0.

        The skbs from sky2 have ip_summed set to CHECKSUM_COMPLETE, and
because the packet is an encapsulated Ethernet frame, the checksum
generated by the hardware includes the inner protocol and Ethernet
headers.

	The receive code is careful to update the skb->csum, except in
__dev_forward_skb, as called by dev_forward_skb.  __dev_forward_skb
calls eth_type_trans, which in turn calls skb_pull_inline(skb, ETH_HLEN)
to skip over the Ethernet header, but does not update skb->csum when
doing so.

	This patch resolves the problem by adding a call to
skb_postpull_rcsum to update the skb->csum after the call to
eth_type_trans.

Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index bd44e28..0094562 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1694,6 +1694,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 
 	skb_scrub_packet(skb, true);
 	skb->protocol = eth_type_trans(skb, dev);
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 
 	return 0;
 }
-- 
cgit v1.1


From 5f35227ea34bb616c436d9da47fc325866c428f3 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 23 Dec 2014 22:37:26 -0800
Subject: net: Generalize ndo_gso_check to ndo_features_check

GSO isn't the only offload feature with restrictions that
potentially can't be expressed with the current features mechanism.
Checksum is another although it's a general issue that could in
theory apply to anything. Even if it may be possible to
implement these restrictions in other ways, it can result in
duplicate code or inefficient per-packet behavior.

This generalizes ndo_gso_check so that drivers can remove any
features that don't make sense for a given packet, similar to
netif_skb_features(). It also converts existing driver
restrictions to the new format, completing the work that was
done to support tunnel protocols since the issues apply to
checksums as well.

By actually removing features from the set that are used to do
offloading, it solves another problem with the existing
interface. In these cases, GSO would run with the original set
of features and not do anything because it appears that
segmentation is not required.

CC: Tom Herbert <therbert@google.com>
CC: Joe Stringer <joestringer@nicira.com>
CC: Eric Dumazet <edumazet@google.com>
CC: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by:  Tom Herbert <therbert@google.com>
Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
Tested-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0094562..683d493 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2563,7 +2563,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
-	const struct net_device *dev = skb->dev;
+	struct net_device *dev = skb->dev;
 	netdev_features_t features = dev->features;
 	u16 gso_segs = skb_shinfo(skb)->gso_segs;
 	__be16 protocol = skb->protocol;
@@ -2571,13 +2571,20 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
+	/* If encapsulation offload request, verify we are testing
+	 * hardware encapsulation features instead of standard
+	 * features for the netdev
+	 */
+	if (skb->encapsulation)
+		features &= dev->hw_enc_features;
+
 	if (!vlan_tx_tag_present(skb)) {
 		if (unlikely(protocol == htons(ETH_P_8021Q) ||
 			     protocol == htons(ETH_P_8021AD))) {
 			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 			protocol = veh->h_vlan_encapsulated_proto;
 		} else {
-			return harmonize_features(skb, features);
+			goto finalize;
 		}
 	}
 
@@ -2595,6 +2602,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 						     NETIF_F_HW_VLAN_CTAG_TX |
 						     NETIF_F_HW_VLAN_STAG_TX);
 
+finalize:
+	if (dev->netdev_ops->ndo_features_check)
+		features &= dev->netdev_ops->ndo_features_check(skb, dev,
+								features);
+
 	return harmonize_features(skb, features);
 }
 EXPORT_SYMBOL(netif_skb_features);
@@ -2665,13 +2677,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (unlikely(!skb))
 		goto out_null;
 
-	/* If encapsulation offload request, verify we are testing
-	 * hardware encapsulation features instead of standard
-	 * features for the netdev
-	 */
-	if (skb->encapsulation)
-		features &= dev->hw_enc_features;
-
 	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs;
 
-- 
cgit v1.1


From 02c81ab95d8718d75886d16227a10cc7774493ea Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 22 Dec 2014 18:56:35 +0100
Subject: netlink: rename netlink_unbind() to netlink_undo_bind()

The new name is more expressive - this isn't a generic unbind
function but rather only a little undo helper for use only in
netlink_bind().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 074cf3e..b4cf8ee 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1410,8 +1410,8 @@ static int netlink_realloc_groups(struct sock *sk)
 	return err;
 }
 
-static void netlink_unbind(int group, long unsigned int groups,
-			   struct netlink_sock *nlk)
+static void netlink_undo_bind(int group, long unsigned int groups,
+			      struct netlink_sock *nlk)
 {
 	int undo;
 
@@ -1461,7 +1461,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			err = nlk->netlink_bind(group);
 			if (!err)
 				continue;
-			netlink_unbind(group, groups, nlk);
+			netlink_undo_bind(group, groups, nlk);
 			return err;
 		}
 	}
@@ -1471,7 +1471,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			netlink_insert(sk, net, nladdr->nl_pid) :
 			netlink_autobind(sock);
 		if (err) {
-			netlink_unbind(nlk->ngroups, groups, nlk);
+			netlink_undo_bind(nlk->ngroups, groups, nlk);
 			return err;
 		}
 	}
-- 
cgit v1.1


From f8403a2e47afb37bcd3b7e286996d138a116c39d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 22 Dec 2014 18:56:36 +0100
Subject: genetlink: pass only network namespace to genl_has_listeners()

There's no point to force the caller to know about the internal
genl_sock to use inside struct net, just have them pass the network
namespace. This doesn't really change code generation since it's
an inline, but makes the caller less magic - there's never any
reason to pass another socket.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 332b5a0..4e9a5f0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -83,8 +83,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
 			    unsigned int group)
 {
 	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
-	       genl_has_listeners(family, genl_info_net(info)->genl_sock,
-				  group);
+	       genl_has_listeners(family, genl_info_net(info), group);
 }
 
 static void ovs_notify(struct genl_family *family,
-- 
cgit v1.1


From b10dcb3b94010e3ac3951f68789400b1665effb1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 22 Dec 2014 18:56:37 +0100
Subject: netlink: update listeners directly when removing socket

The code is now confusing to read - first in one function down
(netlink_remove) any group subscriptions are implicitly removed
by calling __sk_del_bind_node(), but the subscriber database is
only updated far later by calling netlink_update_listeners().

Move the latter call to just after removal from the list so it
is easier to follow the code.

This also enables moving the locking inside the kernel-socket
conditional, which improves the normal socket destruction path.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b4cf8ee..6a9fb7c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1091,8 +1091,10 @@ static void netlink_remove(struct sock *sk)
 	mutex_unlock(&nl_sk_hash_lock);
 
 	netlink_table_grab();
-	if (nlk_sk(sk)->subscriptions)
+	if (nlk_sk(sk)->subscriptions) {
 		__sk_del_bind_node(sk);
+		netlink_update_listeners(sk);
+	}
 	netlink_table_ungrab();
 }
 
@@ -1226,8 +1228,8 @@ static int netlink_release(struct socket *sock)
 
 	module_put(nlk->module);
 
-	netlink_table_grab();
 	if (netlink_is_kernel(sk)) {
+		netlink_table_grab();
 		BUG_ON(nl_table[sk->sk_protocol].registered == 0);
 		if (--nl_table[sk->sk_protocol].registered == 0) {
 			struct listeners *old;
@@ -1241,10 +1243,8 @@ static int netlink_release(struct socket *sock)
 			nl_table[sk->sk_protocol].flags = 0;
 			nl_table[sk->sk_protocol].registered = 0;
 		}
-	} else if (nlk->subscriptions) {
-		netlink_update_listeners(sk);
+		netlink_table_ungrab();
 	}
-	netlink_table_ungrab();
 
 	kfree(nlk->groups);
 	nlk->groups = NULL;
-- 
cgit v1.1


From 7d68536bed72b09de03b07479dd707c5831b3b94 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 22 Dec 2014 18:56:38 +0100
Subject: netlink: call unbind when releasing socket

Currently, netlink_unbind() is only called when the socket
explicitly unbinds, which limits its usefulness (luckily
there are no users of it yet anyway.)

Call netlink_unbind() also when a socket is released, so it
becomes possible to track listeners with this callback and
without also implementing a netlink notifier (and checking
netlink_has_listeners() in there.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6a9fb7c..f29b63f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1246,6 +1246,13 @@ static int netlink_release(struct socket *sock)
 		netlink_table_ungrab();
 	}
 
+	if (nlk->netlink_unbind) {
+		int i;
+
+		for (i = 0; i < nlk->ngroups; i++)
+			if (test_bit(i, nlk->groups))
+				nlk->netlink_unbind(i + 1);
+	}
 	kfree(nlk->groups);
 	nlk->groups = NULL;
 
-- 
cgit v1.1


From c380d9a7afff0e4c2e5f3c1c2dc7d2f4214dd962 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 23 Dec 2014 20:54:40 +0100
Subject: genetlink: pass multicast bind/unbind to families

In order to make the newly fixed multicast bind/unbind
functionality in generic netlink, pass them down to the
appropriate family.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 76393f2..05bf40b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -983,11 +983,70 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
 	{ .name = "notify", },
 };
 
+static int genl_bind(int group)
+{
+	int i, err;
+	bool found = false;
+
+	down_read(&cb_lock);
+	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+		struct genl_family *f;
+
+		list_for_each_entry(f, genl_family_chain(i), family_list) {
+			if (group >= f->mcgrp_offset &&
+			    group < f->mcgrp_offset + f->n_mcgrps) {
+				int fam_grp = group - f->mcgrp_offset;
+
+				if (f->mcast_bind)
+					err = f->mcast_bind(fam_grp);
+				else
+					err = 0;
+				found = true;
+				break;
+			}
+		}
+	}
+	up_read(&cb_lock);
+
+	if (WARN_ON(!found))
+		err = 0;
+
+	return err;
+}
+
+static void genl_unbind(int group)
+{
+	int i;
+	bool found = false;
+
+	down_read(&cb_lock);
+	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
+		struct genl_family *f;
+
+		list_for_each_entry(f, genl_family_chain(i), family_list) {
+			if (group >= f->mcgrp_offset &&
+			    group < f->mcgrp_offset + f->n_mcgrps) {
+				int fam_grp = group - f->mcgrp_offset;
+
+				if (f->mcast_unbind)
+					f->mcast_unbind(fam_grp);
+				found = true;
+				break;
+			}
+		}
+	}
+	up_read(&cb_lock);
+
+	WARN_ON(!found);
+}
+
 static int __net_init genl_pernet_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
 		.input		= genl_rcv,
 		.flags		= NL_CFG_F_NONROOT_RECV,
+		.bind		= genl_bind,
+		.unbind		= genl_unbind,
 	};
 
 	/* we'll bump the group number right afterwards */
-- 
cgit v1.1


From 023e2cfa36c31b0ad28c159a1bb0d61ff57334c8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 23 Dec 2014 21:00:06 +0100
Subject: netlink/genetlink: pass network namespace to bind/unbind

Netlink families can exist in multiple namespaces, and for the most
part multicast subscriptions are per network namespace. Thus it only
makes sense to have bind/unbind notifications per network namespace.

To achieve this, pass the network namespace of a given client socket
to the bind/unbind functions.

Also do this in generic netlink, and there also make sure that any
bind for multicast groups that only exist in init_net is rejected.
This isn't really a problem if it is accepted since a client in a
different namespace will never receive any notifications from such
a group, but it can confuse the family if not rejected (it's also
possible to silently (without telling the family) accept it, but it
would also have to be ignored on unbind so families that take any
kind of action on bind/unbind won't do unnecessary work for invalid
clients like that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c |  2 +-
 net/netlink/af_netlink.c  | 21 +++++++++++----------
 net/netlink/af_netlink.h  |  8 ++++----
 net/netlink/genetlink.c   | 12 +++++++-----
 4 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 13c2e17..cde4a67 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -463,7 +463,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 }
 
 #ifdef CONFIG_MODULES
-static int nfnetlink_bind(int group)
+static int nfnetlink_bind(struct net *net, int group)
 {
 	const struct nfnetlink_subsystem *ss;
 	int type;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f29b63f..84ea76c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1141,8 +1141,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	struct module *module = NULL;
 	struct mutex *cb_mutex;
 	struct netlink_sock *nlk;
-	int (*bind)(int group);
-	void (*unbind)(int group);
+	int (*bind)(struct net *net, int group);
+	void (*unbind)(struct net *net, int group);
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -1251,7 +1251,7 @@ static int netlink_release(struct socket *sock)
 
 		for (i = 0; i < nlk->ngroups; i++)
 			if (test_bit(i, nlk->groups))
-				nlk->netlink_unbind(i + 1);
+				nlk->netlink_unbind(sock_net(sk), i + 1);
 	}
 	kfree(nlk->groups);
 	nlk->groups = NULL;
@@ -1418,8 +1418,9 @@ static int netlink_realloc_groups(struct sock *sk)
 }
 
 static void netlink_undo_bind(int group, long unsigned int groups,
-			      struct netlink_sock *nlk)
+			      struct sock *sk)
 {
+	struct netlink_sock *nlk = nlk_sk(sk);
 	int undo;
 
 	if (!nlk->netlink_unbind)
@@ -1427,7 +1428,7 @@ static void netlink_undo_bind(int group, long unsigned int groups,
 
 	for (undo = 0; undo < group; undo++)
 		if (test_bit(undo, &groups))
-			nlk->netlink_unbind(undo);
+			nlk->netlink_unbind(sock_net(sk), undo);
 }
 
 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
@@ -1465,10 +1466,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		for (group = 0; group < nlk->ngroups; group++) {
 			if (!test_bit(group, &groups))
 				continue;
-			err = nlk->netlink_bind(group);
+			err = nlk->netlink_bind(net, group);
 			if (!err)
 				continue;
-			netlink_undo_bind(group, groups, nlk);
+			netlink_undo_bind(group, groups, sk);
 			return err;
 		}
 	}
@@ -1478,7 +1479,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			netlink_insert(sk, net, nladdr->nl_pid) :
 			netlink_autobind(sock);
 		if (err) {
-			netlink_undo_bind(nlk->ngroups, groups, nlk);
+			netlink_undo_bind(nlk->ngroups, groups, sk);
 			return err;
 		}
 	}
@@ -2129,7 +2130,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		if (!val || val - 1 >= nlk->ngroups)
 			return -EINVAL;
 		if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
-			err = nlk->netlink_bind(val);
+			err = nlk->netlink_bind(sock_net(sk), val);
 			if (err)
 				return err;
 		}
@@ -2138,7 +2139,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 					 optname == NETLINK_ADD_MEMBERSHIP);
 		netlink_table_ungrab();
 		if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
-			nlk->netlink_unbind(val);
+			nlk->netlink_unbind(sock_net(sk), val);
 
 		err = 0;
 		break;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index b20a173..f123a88 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -39,8 +39,8 @@ struct netlink_sock {
 	struct mutex		*cb_mutex;
 	struct mutex		cb_def_mutex;
 	void			(*netlink_rcv)(struct sk_buff *skb);
-	int			(*netlink_bind)(int group);
-	void			(*netlink_unbind)(int group);
+	int			(*netlink_bind)(struct net *net, int group);
+	void			(*netlink_unbind)(struct net *net, int group);
 	struct module		*module;
 #ifdef CONFIG_NETLINK_MMAP
 	struct mutex		pg_vec_lock;
@@ -65,8 +65,8 @@ struct netlink_table {
 	unsigned int		groups;
 	struct mutex		*cb_mutex;
 	struct module		*module;
-	int			(*bind)(int group);
-	void			(*unbind)(int group);
+	int			(*bind)(struct net *net, int group);
+	void			(*unbind)(struct net *net, int group);
 	bool			(*compare)(struct net *net, struct sock *sock);
 	int			registered;
 };
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 05bf40b..91566ed 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -983,7 +983,7 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
 	{ .name = "notify", },
 };
 
-static int genl_bind(int group)
+static int genl_bind(struct net *net, int group)
 {
 	int i, err;
 	bool found = false;
@@ -997,8 +997,10 @@ static int genl_bind(int group)
 			    group < f->mcgrp_offset + f->n_mcgrps) {
 				int fam_grp = group - f->mcgrp_offset;
 
-				if (f->mcast_bind)
-					err = f->mcast_bind(fam_grp);
+				if (!f->netnsok && net != &init_net)
+					err = -ENOENT;
+				else if (f->mcast_bind)
+					err = f->mcast_bind(net, fam_grp);
 				else
 					err = 0;
 				found = true;
@@ -1014,7 +1016,7 @@ static int genl_bind(int group)
 	return err;
 }
 
-static void genl_unbind(int group)
+static void genl_unbind(struct net *net, int group)
 {
 	int i;
 	bool found = false;
@@ -1029,7 +1031,7 @@ static void genl_unbind(int group)
 				int fam_grp = group - f->mcgrp_offset;
 
 				if (f->mcast_unbind)
-					f->mcast_unbind(fam_grp);
+					f->mcast_unbind(net, fam_grp);
 				found = true;
 				break;
 			}
-- 
cgit v1.1


From dc97a1a9477f969e34b38ca9d9cd231cb93ebea2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 29 Dec 2014 16:31:49 -0500
Subject: genetlink: A genl_bind() to an out-of-range multicast group should
 not WARN().

Users can request to bind to arbitrary multicast groups, so warning
when the requested group number is out of range is not appropriate.

And with the warning removed, and the 'err' variable properly given
an initial value, we can remove 'found' altogether.

Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 91566ed..2e11061e 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -985,8 +985,7 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
 
 static int genl_bind(struct net *net, int group)
 {
-	int i, err;
-	bool found = false;
+	int i, err = 0;
 
 	down_read(&cb_lock);
 	for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
@@ -1003,16 +1002,12 @@ static int genl_bind(struct net *net, int group)
 					err = f->mcast_bind(net, fam_grp);
 				else
 					err = 0;
-				found = true;
 				break;
 			}
 		}
 	}
 	up_read(&cb_lock);
 
-	if (WARN_ON(!found))
-		err = 0;
-
 	return err;
 }
 
-- 
cgit v1.1