From 734907e82d21a75a514b80164185427a832a00c0 Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Fri, 8 Feb 2013 09:30:23 -0800
Subject: openvswitch: Fix ovs_vport_cmd_del return value on success

If the pointer does not represent an error then the PTR_ERR macro may still
return a nonzero value. The fix is the same as in ovs_vport_cmd_set.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f996db3..5e275b9 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1772,6 +1772,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(reply))
 		goto exit_unlock;
 
+	err = 0;
 	ovs_dp_detach_port(vport);
 
 	genl_notify(reply, genl_info_net(info), info->snd_portid,
-- 
cgit v1.1


From cb7c5bdffb727a3d4dea5247d9d1d52238b01d90 Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Fri, 8 Feb 2013 13:18:01 -0800
Subject: openvswitch: Fix ovs_vport_cmd_new return value on success

If the pointer does not represent an error then the PTR_ERR
macro may still return a nonzero value.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 5e275b9..a2cd3e6 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1691,6 +1691,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(vport))
 		goto exit_unlock;
 
+	err = 0;
 	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
 					 OVS_VPORT_CMD_NEW);
 	if (IS_ERR(reply)) {
-- 
cgit v1.1


From a15ff76c955d17cf58313097e4a24124da022b1d Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Fri, 15 Feb 2013 11:07:43 -0800
Subject: openvswitch: Call genlmsg_end in queue_userspace_packet

Without genlmsg_end the upcall message ends (according to nlmsg_len)
after the struct ovs_header.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a2cd3e6..cae1062 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -395,6 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 
 	skb_copy_and_csum_dev(skb, nla_data(nla));
 
+	genlmsg_end(user_skb, upcall);
 	err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 
 out:
-- 
cgit v1.1


From 17b682a04841233f827073b327c6533e478dfcd4 Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Tue, 19 Feb 2013 11:10:30 -0800
Subject: openvswitch: Fix parsing invalid LLC/SNAP ethertypes

Before this patch, if an LLC/SNAP packet with OUI 00:00:00 had an
ethertype less than 1536 the flow key given to userspace in the upcall
would contain the invalid ethertype (for example, 3). If userspace
attempted to insert a kernel flow for this key it would be rejected
by ovs_flow_from_nlattrs.

This patch allows OVS to pass the OFTest pktact.DirectBadLlcPackets.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/flow.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c3294ce..0c98d40 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -484,7 +484,11 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 		return htons(ETH_P_802_2);
 
 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
-	return llc->ethertype;
+
+	if (ntohs(llc->ethertype) >= 1536)
+		return llc->ethertype;
+
+	return htons(ETH_P_802_2);
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
-- 
cgit v1.1


From 7b024082b2b279af58e24ebd46e81777723d58da Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Fri, 22 Feb 2013 17:32:26 +0800
Subject: openvswitch: fix the calculation of checksum for vlan header

In vlan_insert_tag(), we insert a 4-byte VLAN header _after_
mac header:

        memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
        ...
        veth->h_vlan_proto = htons(ETH_P_8021Q);
        ...
        veth->h_vlan_TCI = htons(vlan_tci);

so after it, we should recompute the checksum to include these 4 bytes.
skb->data still points to the mac header, therefore VLAN header is at
(2 * ETH_ALEN = 12) bytes after it, not (ETH_HLEN = 14) bytes.

This can also be observed via tcpdump:

         0x0000:  ffff ffff ffff 5254 005d 6f6e 8100 000a
         0x0010:  0806 0001 0800 0604 0001 5254 005d 6f6e
         0x0020:  c0a8 026e 0000 0000 0000 c0a8 0282

Similar for __pop_vlan_tci(), the vlan header we remove is the one
overwritten in:

	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);

Therefore the VLAN_HLEN = 4 bytes after 2 * ETH_ALEN is the part
we want to sub from checksum.

Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Gross <jesse@nicira.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/actions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ac2defe..d4d5363 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -58,7 +58,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
-					+ ETH_HLEN, VLAN_HLEN, 0));
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
 	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
 	*current_tci = vhdr->h_vlan_TCI;
@@ -115,7 +115,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ ETH_HLEN, VLAN_HLEN, 0));
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
 	}
 	__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
-- 
cgit v1.1


From d176ca2a48ff2b5d7becfacdcbd1d72c73bd22d1 Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Fri, 22 Feb 2013 19:41:26 +0800
Subject: openvswitch: remove some useless comments

These comments are useless in upstream kernel.

Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Gross <jesse@nicira.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/vport-netdev.c | 3 +--
 net/openvswitch/vport.c        | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 670cbc3..2130d61 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -43,8 +43,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 
 	/* Make our own copy of the packet.  Otherwise we will mangle the
 	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
-	 * (No one comes after us, since we tell handle_bridge() that we took
-	 * the packet.) */
+	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(!skb))
 		return;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 70af0be..6255e48 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -326,8 +326,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
  * @skb: skb that was received
  *
  * Must be called with rcu_read_lock.  The packet cannot be shared and
- * skb->data should point to the Ethernet header.  The caller must have already
- * called compute_ip_summed() to initialize the checksumming fields.
+ * skb->data should point to the Ethernet header.
  */
 void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
 {
-- 
cgit v1.1


From b141e811a0763bb107af4cd99d456193ccdb8053 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 11:04:45 +0100
Subject: NFC: llcp: Decrease socket ack log when accepting a connection

This is really difficult to test with real NFC devices, but without
this fix an LLCP server will eventually refuse new connections.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/sock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index 5332751..5c7cdf3f 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -278,6 +278,8 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
 
 			pr_debug("Returning sk state %d\n", sk->sk_state);
 
+			sk_acceptq_removed(parent);
+
 			return sk;
 		}
 
-- 
cgit v1.1


From 3536da06db0baa675f32de608c0a4c0f5ef0e9ff Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 15:40:04 +0100
Subject: NFC: llcp: Clean local timers and works when removing a device

Whenever an adapter is removed we must clean all the local structures,
especially the timers and scheduled work. Otherwise those asynchronous
threads will eventually try to access the freed nfc_dev pointer if an LLCP
link is up.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/llcp.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 7f8266d..77e1d97 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -142,20 +142,25 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
 	return local;
 }
 
-static void local_release(struct kref *ref)
+static void local_cleanup(struct nfc_llcp_local *local, bool listen)
 {
-	struct nfc_llcp_local *local;
-
-	local = container_of(ref, struct nfc_llcp_local, ref);
-
-	list_del(&local->list);
-	nfc_llcp_socket_release(local, false);
+	nfc_llcp_socket_release(local, listen);
 	del_timer_sync(&local->link_timer);
 	skb_queue_purge(&local->tx_queue);
 	cancel_work_sync(&local->tx_work);
 	cancel_work_sync(&local->rx_work);
 	cancel_work_sync(&local->timeout_work);
 	kfree_skb(local->rx_pending);
+}
+
+static void local_release(struct kref *ref)
+{
+	struct nfc_llcp_local *local;
+
+	local = container_of(ref, struct nfc_llcp_local, ref);
+
+	list_del(&local->list);
+	local_cleanup(local, false);
 	kfree(local);
 }
 
@@ -1427,6 +1432,8 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev)
 		return;
 	}
 
+	local_cleanup(local, false);
+
 	nfc_llcp_local_put(local);
 }
 
-- 
cgit v1.1


From e6a3a4bb856a6fba551b43376c80f45836132710 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 16:33:30 +0100
Subject: NFC: llcp: Clean raw sockets from nfc_llcp_socket_release

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/llcp.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'net')

diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 77e1d97..8a35423 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -133,6 +133,35 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 	}
 
 	write_unlock(&local->sockets.lock);
+
+	/*
+	 * If we want to keep the listening sockets alive,
+	 * we don't touch the RAW ones.
+	 */
+	if (listen == true)
+		return;
+
+	write_lock(&local->raw_sockets.lock);
+
+	sk_for_each_safe(sk, tmp, &local->raw_sockets.head) {
+		llcp_sock = nfc_llcp_sock(sk);
+
+		bh_lock_sock(sk);
+
+		nfc_llcp_socket_purge(llcp_sock);
+
+		sk->sk_state = LLCP_CLOSED;
+
+		sk->sk_state_change(sk);
+
+		bh_unlock_sock(sk);
+
+		sock_orphan(sk);
+
+		sk_del_node_init(sk);
+	}
+
+	write_unlock(&local->raw_sockets.lock);
 }
 
 struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
-- 
cgit v1.1


From 3bbc0ceb7ac2bf694d31362eea2c71a680e5deeb Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 17:01:06 +0100
Subject: NFC: llcp: Report error to pending sockets when a device is removed

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/llcp.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 8a35423..b530afa 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -68,7 +68,8 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
 	}
 }
 
-static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
+static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
+				    int err)
 {
 	struct sock *sk;
 	struct hlist_node *tmp;
@@ -100,7 +101,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 
 				nfc_llcp_accept_unlink(accept_sk);
 
+				if (err)
+					accept_sk->sk_err = err;
 				accept_sk->sk_state = LLCP_CLOSED;
+				accept_sk->sk_state_change(sk);
 
 				bh_unlock_sock(accept_sk);
 
@@ -123,7 +127,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 			continue;
 		}
 
+		if (err)
+			sk->sk_err = err;
 		sk->sk_state = LLCP_CLOSED;
+		sk->sk_state_change(sk);
 
 		bh_unlock_sock(sk);
 
@@ -150,8 +157,9 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 
 		nfc_llcp_socket_purge(llcp_sock);
 
+		if (err)
+			sk->sk_err = err;
 		sk->sk_state = LLCP_CLOSED;
-
 		sk->sk_state_change(sk);
 
 		bh_unlock_sock(sk);
@@ -173,7 +181,7 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
 
 static void local_cleanup(struct nfc_llcp_local *local, bool listen)
 {
-	nfc_llcp_socket_release(local, listen);
+	nfc_llcp_socket_release(local, listen, ENXIO);
 	del_timer_sync(&local->link_timer);
 	skb_queue_purge(&local->tx_queue);
 	cancel_work_sync(&local->tx_work);
@@ -1382,7 +1390,7 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev)
 		return;
 
 	/* Close and purge all existing sockets */
-	nfc_llcp_socket_release(local, true);
+	nfc_llcp_socket_release(local, true, 0);
 }
 
 void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
-- 
cgit v1.1


From b47506d91259c29b9c75c404737eb6525556f9b4 Mon Sep 17 00:00:00 2001
From: Marek Lindner <lindner_marek@yahoo.de>
Date: Mon, 4 Mar 2013 10:39:49 +0800
Subject: batman-adv: verify tt len does not exceed packet len

batadv_iv_ogm_process() accesses the packet using the tt_num_changes
attribute regardless of the real packet len (assuming the length check
was done before). Therefore a length check is needed to avoid reading
random memory.

Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/bat_iv_ogm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a0b253e..a5bb0a76 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1288,7 +1288,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 	batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
 
 	/* unpack the aggregated packets and process them one by one */
-	do {
+	while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
+					 batadv_ogm_packet->tt_num_changes)) {
 		tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN;
 
 		batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff,
@@ -1299,8 +1300,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 
 		packet_pos = packet_buff + buff_pos;
 		batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
-	} while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
-					   batadv_ogm_packet->tt_num_changes));
+	}
 
 	kfree_skb(skb);
 	return NET_RX_SUCCESS;
-- 
cgit v1.1


From 4660c7f498c07c43173142ea95145e9dac5a6d14 Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Mon, 11 Mar 2013 10:43:39 +0000
Subject: net/ipv4: Ensure that location of timestamp option is stored

This is needed in order to detect if the timestamp option appears
more than once in a packet, to remove the option if the packet is
fragmented, etc. My previous change neglected to store the option
location when the router addresses were prespecified and Pointer >
Length. But now the option location is also stored when Flag is an
unrecognized value, to ensure these option handling behaviors are
still performed.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_options.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 310a364..ec72645 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -370,7 +370,6 @@ int ip_options_compile(struct net *net,
 				}
 				switch (optptr[3]&0xF) {
 				      case IPOPT_TS_TSONLY:
-					opt->ts = optptr - iph;
 					if (skb)
 						timeptr = &optptr[optptr[2]-1];
 					opt->ts_needtime = 1;
@@ -381,7 +380,6 @@ int ip_options_compile(struct net *net,
 						pp_ptr = optptr + 2;
 						goto error;
 					}
-					opt->ts = optptr - iph;
 					if (rt)  {
 						spec_dst_fill(&spec_dst, skb);
 						memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
@@ -396,7 +394,6 @@ int ip_options_compile(struct net *net,
 						pp_ptr = optptr + 2;
 						goto error;
 					}
-					opt->ts = optptr - iph;
 					{
 						__be32 addr;
 						memcpy(&addr, &optptr[optptr[2]-1], 4);
@@ -429,12 +426,12 @@ int ip_options_compile(struct net *net,
 					pp_ptr = optptr + 3;
 					goto error;
 				}
-				opt->ts = optptr - iph;
 				if (skb) {
 					optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
 					opt->is_changed = 1;
 				}
 			}
+			opt->ts = optptr - iph;
 			break;
 		      case IPOPT_RA:
 			if (optlen < 4) {
-- 
cgit v1.1


From 3da889b616164bde76a37350cf28e0d17a94e979 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Mon, 11 Mar 2013 13:52:17 +0000
Subject: bridge: reserve space for IFLA_BRPORT_FAST_LEAVE

The bridge multicast fast leave feature was added sufficient space
was not reserved in the netlink message. This means the flag may be
lost in netlink events and results of queries.

Found by observation while looking up some netlink stuff for discussion with Vlad.
Problem introduced by commit c2d3babfafbb9f6629cfb47139758e59a5eb0d80
Author: David S. Miller <davem@davemloft.net>
Date:   Wed Dec 5 16:24:45 2012 -0500

    bridge: implement multicast fast leave

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 27aa3ee..db12a0f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -29,6 +29,7 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_MODE */
 		+ nla_total_size(1)	/* IFLA_BRPORT_GUARD */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROTECT */
+		+ nla_total_size(1)	/* IFLA_BRPORT_FAST_LEAVE */
 		+ 0;
 }
 
-- 
cgit v1.1


From c80a8512ee3a8e1f7c3704140ea55f21dc6bd651 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Mon, 11 Mar 2013 20:30:44 +0000
Subject: net/core: move vlan_depth out of while loop in skb_network_protocol()

[ Bug added added in commit 05e8ef4ab2d8087d (net: factor out
  skb_mac_gso_segment() from skb_gso_segment() ) ]

move vlan_depth out of while loop, or else vlan_depth always is ETH_HLEN,
can not be increased, and lead to infinite loop when frame has two vlan headers.

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index dffbef7..d540ced 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2219,9 +2219,9 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
 
 	while (type == htons(ETH_P_8021Q)) {
-		int vlan_depth = ETH_HLEN;
 		struct vlan_hdr *vh;
 
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
-- 
cgit v1.1


From f2815633504b442ca0b0605c16bf3d88a3a0fcea Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 12 Mar 2013 15:53:23 +0000
Subject: sctp: Use correct sideffect command in duplicate cookie handling

When SCTP is done processing a duplicate cookie chunk, it tries
to delete a newly created association.  For that, it has to set
the right association for the side-effect processing to work.
However, when it uses the SCTP_CMD_NEW_ASOC command, that performs
more work then really needed (like hashing the associationa and
assigning it an id) and there is no point to do that only to
delete the association as a next step.  In fact, it also creates
an impossible condition where an association may be found by
the getsockopt() call, and that association is empty.  This
causes a crash in some sctp getsockopts.

The solution is rather simple.  We simply use SCTP_CMD_SET_ASOC
command that doesn't have all the overhead and does exactly
what we need.

Reported-by: Karl Heiss <kheiss@gmail.com>
Tested-by: Karl Heiss <kheiss@gmail.com>
CC: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5131fcf..de1a013 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2082,7 +2082,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 	}
 
 	/* Delete the tempory new association. */
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
 	/* Restore association pointer to provide SCTP command interpeter
-- 
cgit v1.1


From 2317f449af30073cfa6ec8352e4a65a89e357bdd Mon Sep 17 00:00:00 2001
From: Xufeng Zhang <xufeng.zhang@windriver.com>
Date: Thu, 7 Mar 2013 21:39:37 +0000
Subject: sctp: don't break the loop while meeting the active_path so as to
 find the matched transport

sctp_assoc_lookup_tsn() function searchs which transport a certain TSN
was sent on, if not found in the active_path transport, then go search
all the other transports in the peer's transport_addr_list, however, we
should continue to the next entry rather than break the loop when meet
the active_path transport.

Signed-off-by: Xufeng Zhang <xufeng.zhang@windriver.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 43cd0dd..d2709e2 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1079,7 +1079,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc,
 			transports) {
 
 		if (transport == active)
-			break;
+			continue;
 		list_for_each_entry(chunk, &transport->transmitted,
 				transmitted_list) {
 			if (key == chunk->subh.data_hdr->tsn) {
-- 
cgit v1.1


From 16fad69cfe4adbbfa813de516757b87bcae36d93 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 14 Mar 2013 05:40:32 +0000
Subject: tcp: fix skb_availroom()

Chrome OS team reported a crash on a Pixel ChromeBook in TCP stack :

https://code.google.com/p/chromium/issues/detail?id=182056

commit a21d45726acac (tcp: avoid order-1 allocations on wifi and tx
path) did a poor choice adding an 'avail_size' field to skb, while
what we really needed was a 'reserved_tailroom' one.

It would have avoided commit 22b4a4f22da (tcp: fix retransmit of
partially acked frames) and this commit.

Crash occurs because skb_split() is not aware of the 'avail_size'
management (and should not be aware)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Mukesh Agrawal <quiche@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c        | 2 +-
 net/ipv4/tcp_output.c | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 47e854f..e220207 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -775,7 +775,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 			 * Make sure that we have exactly size bytes
 			 * available to the caller, no more, no less.
 			 */
-			skb->avail_size = size;
+			skb->reserved_tailroom = skb->end - skb->tail - size;
 			return skb;
 		}
 		__kfree_skb(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e2b4461..817fbb3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1298,7 +1298,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 	eat = min_t(int, len, skb_headlen(skb));
 	if (eat) {
 		__skb_pull(skb, eat);
-		skb->avail_size -= eat;
 		len -= eat;
 		if (!len)
 			return;
-- 
cgit v1.1


From eb20ff9c91ddcb2d55c1849a87d3db85af5e88a9 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Date: Wed, 13 Mar 2013 19:46:20 -0300
Subject: Bluetooth: Fix not closing SCO sockets in the BT_CONNECT2 state

With deferred setup for SCO, it is possible that userspace closes the
socket when it is in the BT_CONNECT2 state, after the Connect Request is
received but before the Accept Synchonous Connection is sent.

If this happens the following crash was observed, when the connection is
terminated:

[  +0.000003] hci_sync_conn_complete_evt: hci0 status 0x10
[  +0.000005] sco_connect_cfm: hcon ffff88003d1bd800 bdaddr 40:98:4e:32:d7:39 status 16
[  +0.000003] sco_conn_del: hcon ffff88003d1bd800 conn ffff88003cc8e300, err 110
[  +0.000015] BUG: unable to handle kernel NULL pointer dereference at 0000000000000199
[  +0.000906] IP: [<ffffffff810620dd>] __lock_acquire+0xed/0xe82
[  +0.000000] PGD 3d21f067 PUD 3d291067 PMD 0
[  +0.000000] Oops: 0002 [#1] SMP
[  +0.000000] Modules linked in: rfcomm bnep btusb bluetooth
[  +0.000000] CPU 0
[  +0.000000] Pid: 1481, comm: kworker/u:2H Not tainted 3.9.0-rc1-25019-gad82cdd #1 Bochs Bochs
[  +0.000000] RIP: 0010:[<ffffffff810620dd>]  [<ffffffff810620dd>] __lock_acquire+0xed/0xe82
[  +0.000000] RSP: 0018:ffff88003c3c19d8  EFLAGS: 00010002
[  +0.000000] RAX: 0000000000000001 RBX: 0000000000000246 RCX: 0000000000000000
[  +0.000000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003d1be868
[  +0.000000] RBP: ffff88003c3c1a98 R08: 0000000000000002 R09: 0000000000000000
[  +0.000000] R10: ffff88003d1be868 R11: ffff88003e20b000 R12: 0000000000000002
[  +0.000000] R13: ffff88003aaa8000 R14: 000000000000006e R15: ffff88003d1be850
[  +0.000000] FS:  0000000000000000(0000) GS:ffff88003e200000(0000) knlGS:0000000000000000
[  +0.000000] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  +0.000000] CR2: 0000000000000199 CR3: 000000003c1cb000 CR4: 00000000000006b0
[  +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  +0.000000] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  +0.000000] Process kworker/u:2H (pid: 1481, threadinfo ffff88003c3c0000, task ffff88003aaa8000)
[  +0.000000] Stack:
[  +0.000000]  ffffffff81b16342 0000000000000000 0000000000000000 ffff88003d1be868
[  +0.000000]  ffffffff00000000 00018c0c7863e367 000000003c3c1a28 ffffffff8101efbd
[  +0.000000]  0000000000000000 ffff88003e3d2400 ffff88003c3c1a38 ffffffff81007c7a
[  +0.000000] Call Trace:
[  +0.000000]  [<ffffffff8101efbd>] ? kvm_clock_read+0x34/0x3b
[  +0.000000]  [<ffffffff81007c7a>] ? paravirt_sched_clock+0x9/0xd
[  +0.000000]  [<ffffffff81007fd4>] ? sched_clock+0x9/0xb
[  +0.000000]  [<ffffffff8104fd7a>] ? sched_clock_local+0x12/0x75
[  +0.000000]  [<ffffffff810632d1>] lock_acquire+0x93/0xb1
[  +0.000000]  [<ffffffffa0022339>] ? spin_lock+0x9/0xb [bluetooth]
[  +0.000000]  [<ffffffff8105f3d8>] ? lock_release_holdtime.part.22+0x4e/0x55
[  +0.000000]  [<ffffffff814f6038>] _raw_spin_lock+0x40/0x74
[  +0.000000]  [<ffffffffa0022339>] ? spin_lock+0x9/0xb [bluetooth]
[  +0.000000]  [<ffffffff814f6936>] ? _raw_spin_unlock+0x23/0x36
[  +0.000000]  [<ffffffffa0022339>] spin_lock+0x9/0xb [bluetooth]
[  +0.000000]  [<ffffffffa00230cc>] sco_conn_del+0x76/0xbb [bluetooth]
[  +0.000000]  [<ffffffffa002391d>] sco_connect_cfm+0x2da/0x2e9 [bluetooth]
[  +0.000000]  [<ffffffffa000862a>] hci_proto_connect_cfm+0x38/0x65 [bluetooth]
[  +0.000000]  [<ffffffffa0008d30>] hci_sync_conn_complete_evt.isra.79+0x11a/0x13e [bluetooth]
[  +0.000000]  [<ffffffffa000cd96>] hci_event_packet+0x153b/0x239d [bluetooth]
[  +0.000000]  [<ffffffff814f68ff>] ? _raw_spin_unlock_irqrestore+0x48/0x5c
[  +0.000000]  [<ffffffffa00025f6>] hci_rx_work+0xf3/0x2e3 [bluetooth]
[  +0.000000]  [<ffffffff8103efed>] process_one_work+0x1dc/0x30b
[  +0.000000]  [<ffffffff8103ef83>] ? process_one_work+0x172/0x30b
[  +0.000000]  [<ffffffff8103e07f>] ? spin_lock_irq+0x9/0xb
[  +0.000000]  [<ffffffff8103fc8d>] worker_thread+0x123/0x1d2
[  +0.000000]  [<ffffffff8103fb6a>] ? manage_workers+0x240/0x240
[  +0.000000]  [<ffffffff81044211>] kthread+0x9d/0xa5
[  +0.000000]  [<ffffffff81044174>] ? __kthread_parkme+0x60/0x60
[  +0.000000]  [<ffffffff814f75bc>] ret_from_fork+0x7c/0xb0
[  +0.000000]  [<ffffffff81044174>] ? __kthread_parkme+0x60/0x60
[  +0.000000] Code: d7 44 89 8d 50 ff ff ff 4c 89 95 58 ff ff ff e8 44 fc ff ff 44 8b 8d 50 ff ff ff 48 85 c0 4c 8b 95 58 ff ff ff 0f 84 7a 04 00 00 <f0> ff 80 98 01 00 00 83 3d 25 41 a7 00 00 45 8b b5 e8 05 00 00
[  +0.000000] RIP  [<ffffffff810620dd>] __lock_acquire+0xed/0xe82
[  +0.000000]  RSP <ffff88003c3c19d8>
[  +0.000000] CR2: 0000000000000199
[  +0.000000] ---[ end trace e73cd3b52352dd34 ]---

Cc: stable@vger.kernel.org [3.8]
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Tested-by: Frederic Dalleau <frederic.dalleau@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/sco.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 57f250c..aaf1957 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -361,6 +361,7 @@ static void __sco_sock_close(struct sock *sk)
 			sco_chan_del(sk, ECONNRESET);
 		break;
 
+	case BT_CONNECT2:
 	case BT_CONNECT:
 	case BT_DISCONN:
 		sco_chan_del(sk, ECONNRESET);
-- 
cgit v1.1


From 0d98da5d845e0d0293055913ce65c9904b3b902a Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Thu, 7 Mar 2013 17:20:46 +0000
Subject: netfilter: nf_conntrack: register pernet subsystem before register L4
 proto

In (c296bb4 netfilter: nf_conntrack: refactor l4proto support for netns)
the l4proto gre/dccp/udplite/sctp registration happened before the pernet
subsystem, which is wrong.

Register pernet subsystem before register L4proto since after register
L4proto, init_conntrack may try to access the resources which allocated
in register_pernet_subsys.

Reported-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_dccp.c    | 12 ++++++------
 net/netfilter/nf_conntrack_proto_gre.c     | 12 ++++++------
 net/netfilter/nf_conntrack_proto_sctp.c    | 12 ++++++------
 net/netfilter/nf_conntrack_proto_udplite.c | 12 ++++++------
 4 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 432f957..ba65b20 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -969,6 +969,10 @@ static int __init nf_conntrack_proto_dccp_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&dccp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
 	ret = nf_ct_l4proto_register(&dccp_proto4);
 	if (ret < 0)
 		goto out_dccp4;
@@ -977,16 +981,12 @@ static int __init nf_conntrack_proto_dccp_init(void)
 	if (ret < 0)
 		goto out_dccp6;
 
-	ret = register_pernet_subsys(&dccp_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&dccp_proto6);
 out_dccp6:
 	nf_ct_l4proto_unregister(&dccp_proto4);
 out_dccp4:
+	unregister_pernet_subsys(&dccp_net_ops);
+out_pernet:
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index bd7d01d..155ce9f 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -420,18 +420,18 @@ static int __init nf_ct_proto_gre_init(void)
 {
 	int ret;
 
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
-	if (ret < 0)
-		goto out_gre4;
-
 	ret = register_pernet_subsys(&proto_gre_net_ops);
 	if (ret < 0)
 		goto out_pernet;
 
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
+	if (ret < 0)
+		goto out_gre4;
+
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
 out_gre4:
+	unregister_pernet_subsys(&proto_gre_net_ops);
+out_pernet:
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 480f616..ec83536 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -888,6 +888,10 @@ static int __init nf_conntrack_proto_sctp_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&sctp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
 	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
 	if (ret < 0)
 		goto out_sctp4;
@@ -896,16 +900,12 @@ static int __init nf_conntrack_proto_sctp_init(void)
 	if (ret < 0)
 		goto out_sctp6;
 
-	ret = register_pernet_subsys(&sctp_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
 out_sctp6:
 	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
 out_sctp4:
+	unregister_pernet_subsys(&sctp_net_ops);
+out_pernet:
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 1574895..ca969f6 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -371,6 +371,10 @@ static int __init nf_conntrack_proto_udplite_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&udplite_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
 	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
 	if (ret < 0)
 		goto out_udplite4;
@@ -379,16 +383,12 @@ static int __init nf_conntrack_proto_udplite_init(void)
 	if (ret < 0)
 		goto out_udplite6;
 
-	ret = register_pernet_subsys(&udplite_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
 out_udplite6:
 	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
 out_udplite4:
+	unregister_pernet_subsys(&udplite_net_ops);
+out_pernet:
 	return ret;
 }
 
-- 
cgit v1.1


From bae99f7a1d372374aaf9ed8910f3b825da995b36 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 14 Mar 2013 06:03:18 +0000
Subject: netfilter: nfnetlink_queue: fix incorrect initialization of copy
 range field

2^16 = 0xffff, not 0xfffff (note the extra 'f'). Not dangerous since you
adjust it to min_t(data_len, skb->len) just after on.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 858fd52..1cb4854 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -112,7 +112,7 @@ instance_create(u_int16_t queue_num, int portid)
 	inst->queue_num = queue_num;
 	inst->peer_portid = portid;
 	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
-	inst->copy_range = 0xfffff;
+	inst->copy_range = 0xffff;
 	inst->copy_mode = NFQNL_COPY_NONE;
 	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
-- 
cgit v1.1


From a82783c91d5dce680dbd290ebf301a520b0e72a5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Mar 2013 20:11:01 +0000
Subject: netfilter: ip6t_NPT: restrict to mangle table

As the translation is stateless, using it in nat table
doesn't work (only initial packet is translated).
filter table OUTPUT works but won't re-route the packet after translation.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/ip6t_NPT.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 83acc14..33608c6 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -114,6 +114,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
 static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
 	{
 		.name		= "SNPT",
+		.table		= "mangle",
 		.target		= ip6t_snpt_tg,
 		.targetsize	= sizeof(struct ip6t_npt_tginfo),
 		.checkentry	= ip6t_npt_checkentry,
@@ -124,6 +125,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
 	},
 	{
 		.name		= "DNPT",
+		.table		= "mangle",
 		.target		= ip6t_dnpt_tg,
 		.targetsize	= sizeof(struct ip6t_npt_tginfo),
 		.checkentry	= ip6t_npt_checkentry,
-- 
cgit v1.1


From 8c6216d7f118a128678270824b6a1286a63863ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 13 Mar 2013 02:37:49 +0000
Subject: Revert "ip_gre: make ipgre_tunnel_xmit() not parse network header as
 IP unconditionally"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 412ed94744d16806fbec3bd250fd94e71cde5a1f.

The commit is wrong as tiph points to the outer IPv4 header which is
installed at ipgre_header() and not the inner one which is protocol dependant.

This commit broke succesfully opennhrp which use PF_PACKET socket with
ETH_P_NHRP protocol. Additionally ssl_addr is set to the link-layer
IPv4 address. This address is written by ipgre_header() to the skb
earlier, and this is the IPv4 header tiph should point to - regardless
of the inner protocol payload.

Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ef0e6..91d66db 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -798,10 +798,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
 	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
 		gre_hlen = 0;
-		if (skb->protocol == htons(ETH_P_IP))
-			tiph = (const struct iphdr *)skb->data;
-		else
-			tiph = &tunnel->parms.iph;
+		tiph = (const struct iphdr *)skb->data;
 	} else {
 		gre_hlen = tunnel->hlen;
 		tiph = &tunnel->parms.iph;
-- 
cgit v1.1


From a5b8db91442fce9c9713fcd656c3698f1adde1d6 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Mar 2013 04:18:58 +0000
Subject: rtnetlink: Mask the rta_type when range checking

Range/validity checks on rta_type in rtnetlink_rcv_msg() do
not account for flags that may be set.  This causes the function
to return -EINVAL when flags are set on the type (for example
NLA_F_NESTED).

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a585d45..5fb8d7e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2621,7 +2621,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
 
 		while (RTA_OK(attr, attrlen)) {
-			unsigned int flavor = attr->rta_type;
+			unsigned int flavor = attr->rta_type & NLA_TYPE_MASK;
 			if (flavor) {
 				if (flavor > rta_max[sz_idx])
 					return -EINVAL;
-- 
cgit v1.1


From 3d84fa98aca7f05f7010022bc45acb1b50326332 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 15 Mar 2013 06:39:12 +0000
Subject: bridge: Add support for setting BR_ROOT_BLOCK flag.

Most of the support was already there.  The only thing that was missing
was the call to set the flag.  Add this call.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index db12a0f..299fc5f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -330,6 +330,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 	br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
 	br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
 	br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
+	br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
 
 	if (tb[IFLA_BRPORT_COST]) {
 		err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
-- 
cgit v1.1


From 92f28d973cce45ef5823209aab3138eb45d8b349 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 15 Mar 2013 01:03:33 -0700
Subject: scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids.

Don't allow spoofing pids over unix domain sockets in the corner
cases where a user has created a user namespace but has not yet
created a pid namespace.

Cc: stable@vger.kernel.org
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 net/core/scm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/scm.c b/net/core/scm.c
index 905dcc6..2dc6cda 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
+#include <linux/pid_namespace.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds)
 	if (!uid_valid(uid) || !gid_valid(gid))
 		return -EINVAL;
 
-	if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == task_tgid_vnr(current) ||
+	     ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
 	    ((uid_eq(uid, cred->uid)   || uid_eq(uid, cred->euid) ||
 	      uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
 	    ((gid_eq(gid, cred->gid)   || gid_eq(gid, cred->egid) ||
-- 
cgit v1.1


From 0d4f0608619de59fd8169dd8e72aadc28d80e715 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Mar 2013 07:01:28 +0000
Subject: tcp: dont handle MTU reduction on LISTEN socket

When an ICMP ICMP_FRAG_NEEDED (or ICMPV6_PKT_TOOBIG) message finds a
LISTEN socket, and this socket is currently owned by the user, we
set TCP_MTU_REDUCED_DEFERRED flag in listener tsq_flags.

This is bad because if we clone the parent before it had a chance to
clear the flag, the child inherits the tsq_flags value, and next
tcp_release_cb() on the child will decrement sk_refcnt.

Result is that we might free a live TCP socket, as reported by
Dormando.

IPv4: Attempt to release TCP socket in state 1

Fix this issue by testing sk_state against TCP_LISTEN early, so that we
set TCP_MTU_REDUCED_DEFERRED on appropriate sockets (not a LISTEN one)

This bug was introduced in commit 563d34d05786
(tcp: dont drop MTU reduction indications)

Reported-by: dormando <dormando@rydia.net>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 14 +++++++-------
 net/ipv6/tcp_ipv6.c |  7 +++++++
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4a8ec45..d09203c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 	u32 mtu = tcp_sk(sk)->mtu_info;
 
-	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
-	 * send out by Linux are always <576bytes so they should go through
-	 * unfragmented).
-	 */
-	if (sk->sk_state == TCP_LISTEN)
-		return;
-
 	dst = inet_csk_update_pmtu(sk, mtu);
 	if (!dst)
 		return;
@@ -408,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 			goto out;
 
 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+			/* We are not interested in TCP_LISTEN and open_requests
+			 * (SYN-ACKs send out by Linux are always <576bytes so
+			 * they should go through unfragmented).
+			 */
+			if (sk->sk_state == TCP_LISTEN)
+				goto out;
+
 			tp->mtu_info = info;
 			if (!sock_owned_by_user(sk)) {
 				tcp_v4_mtu_reduced(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9b64600..f6d629f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -389,6 +389,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	}
 
 	if (type == ICMPV6_PKT_TOOBIG) {
+		/* We are not interested in TCP_LISTEN and open_requests
+		 * (SYN-ACKs send out by Linux are always <576bytes so
+		 * they should go through unfragmented).
+		 */
+		if (sk->sk_state == TCP_LISTEN)
+			goto out;
+
 		tp->mtu_info = ntohl(info);
 		if (!sock_owned_by_user(sk))
 			tcp_v6_mtu_reduced(sk);
-- 
cgit v1.1


From cf2e39429c245245db889fffdfbdf3f889a6cb22 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 9 Mar 2013 23:25:06 +0200
Subject: ipvs: fix sctp chunk length order

Fix wrong but non-fatal access to chunk length.
sch->length should be in network order, next chunk should
be aligned to 4 bytes. Problem noticed in sparse output.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index ae8ec6f..cd1d729 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -906,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	sctp_chunkhdr_t _sctpch, *sch;
 	unsigned char chunk_type;
 	int event, next_state;
-	int ihl;
+	int ihl, cofs;
 
 #ifdef CONFIG_IP_VS_IPV6
 	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
@@ -914,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	ihl = ip_hdrlen(skb);
 #endif
 
-	sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t),
-				sizeof(_sctpch), &_sctpch);
+	cofs = ihl + sizeof(sctp_sctphdr_t);
+	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
 	if (sch == NULL)
 		return;
 
@@ -933,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	 */
 	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
 	    (sch->type == SCTP_CID_COOKIE_ACK)) {
-		sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) +
-				sch->length), sizeof(_sctpch), &_sctpch);
-		if (sch) {
-			if (sch->type == SCTP_CID_ABORT)
+		int clen = ntohs(sch->length);
+
+		if (clen >= sizeof(sctp_chunkhdr_t)) {
+			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
+						 sizeof(_sctpch), &_sctpch);
+			if (sch && sch->type == SCTP_CID_ABORT)
 				chunk_type = sch->type;
 		}
 	}
-- 
cgit v1.1


From 0c12582fbcdea0cbb0dfd224e1c5f9a8428ffa18 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 9 Mar 2013 23:25:04 +0200
Subject: ipvs: add backup_only flag to avoid loops

Dmitry Akindinov is reporting for a problem where SYNs are looping
between the master and backup server when the backup server is used as
real server in DR mode and has IPVS rules to function as director.

Even when the backup function is enabled we continue to forward
traffic and schedule new connections when the current master is using
the backup server as real server. While this is not a problem for NAT,
for DR and TUN method the backup server can not determine if a request
comes from client or from director.

To avoid such loops add new sysctl flag backup_only. It can be needed
for DR/TUN setups that do not need backup and director function at the
same time. When the backup function is enabled we stop any forwarding
and pass the traffic to the local stack (real server mode). The flag
disables the director function when the backup function is enabled.

For setups that enable backup function for some virtual services and
director function for other virtual services there should be another
more complex solution to support DR/TUN mode, may be to assign
per-virtual service syncid value, so that we can differentiate the
requests.

Reported-by: Dmitry Akindinov <dimak@stalker.com>
Tested-by: German Myzovsky <lawyer@sipnet.ru>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 12 ++++++++----
 net/netfilter/ipvs/ip_vs_ctl.c  |  7 +++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 47edf5a..18b4bc5 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1577,7 +1577,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	ip_vs_fill_iph_skb(af, skb, &iph);
@@ -1654,7 +1655,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */
@@ -1815,13 +1815,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 {
 	int r;
 	struct net *net;
+	struct netns_ipvs *ipvs;
 
 	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp(skb, &r, hooknum);
@@ -1835,6 +1837,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 {
 	int r;
 	struct net *net;
+	struct netns_ipvs *ipvs;
 	struct ip_vs_iphdr iphdr;
 
 	ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
@@ -1843,7 +1846,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c68198b..9e2d1cc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1808,6 +1808,12 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "backup_only",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
@@ -3741,6 +3747,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 	ipvs->sysctl_pmtu_disc = 1;
 	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
+	tbl[idx++].data = &ipvs->sysctl_backup_only;
 
 
 	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
-- 
cgit v1.1


From bf93ad72cd8cfabe66a7b3d66236a1266d357189 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 9 Mar 2013 23:25:05 +0200
Subject: ipvs: remove extra rcu lock

In 3.7 we added code that uses ipv4_update_pmtu but after commit
c5ae7d4192 (ipv4: must use rcu protection while calling fib_lookup)
the RCU lock is not needed.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 18b4bc5..61f49d2 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1394,10 +1394,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 			skb_reset_network_header(skb);
 			IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
 				&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
-			rcu_read_lock();
 			ipv4_update_pmtu(skb, dev_net(skb->dev),
 					 mtu, 0, 0, 0, 0);
-			rcu_read_unlock();
 			/* Client uses PMTUD? */
 			if (!(cih->frag_off & htons(IP_DF)))
 				goto ignore_ipip;
-- 
cgit v1.1


From 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 15 Mar 2013 11:32:30 +0000
Subject: inet: limit length of fragment queue hash table bucket lists

This patch introduces a constant limit of the fragment queue hash
table bucket list lengths. Currently the limit 128 is choosen somewhat
arbitrary and just ensures that we can fill up the fragment cache with
empty packets up to the default ip_frag_high_thresh limits. It should
just protect from list iteration eating considerable amounts of cpu.

If we reach the maximum length in one hash bucket a warning is printed.
This is implemented on the caller side of inet_frag_find to distinguish
between the different users of inet_fragment.c.

I dropped the out of memory warning in the ipv4 fragment lookup path,
because we already get a warning by the slab allocator.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jesper Dangaard Brouer <jbrouer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c                | 20 +++++++++++++++++++-
 net/ipv4/ip_fragment.c                  | 11 ++++-------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++------
 net/ipv6/reassembly.c                   |  8 ++++++--
 4 files changed, 35 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 245ae078a..f4fd23d 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -21,6 +21,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
+#include <net/sock.h>
 #include <net/inet_frag.h>
 
 static void inet_frag_secret_rebuild(unsigned long dummy)
@@ -277,6 +278,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	__releases(&f->lock)
 {
 	struct inet_frag_queue *q;
+	int depth = 0;
 
 	hlist_for_each_entry(q, &f->hash[hash], list) {
 		if (q->net == nf && f->match(q, key)) {
@@ -284,9 +286,25 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 			read_unlock(&f->lock);
 			return q;
 		}
+		depth++;
 	}
 	read_unlock(&f->lock);
 
-	return inet_frag_create(nf, f, key);
+	if (depth <= INETFRAGS_MAXDEPTH)
+		return inet_frag_create(nf, f, key);
+	else
+		return ERR_PTR(-ENOBUFS);
 }
 EXPORT_SYMBOL(inet_frag_find);
+
+void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+				   const char *prefix)
+{
+	static const char msg[] = "inet_frag_find: Fragment hash bucket"
+		" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
+		". Dropping fragment.\n";
+
+	if (PTR_ERR(q) == -ENOBUFS)
+		LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg);
+}
+EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6d30ac..a6445b8 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -292,14 +292,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
 	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
 	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
-	if (q == NULL)
-		goto out_nomem;
-
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 	return container_of(q, struct ipq, q);
-
-out_nomem:
-	LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
-	return NULL;
 }
 
 /* Is the fragment too far ahead to be part of ipq? */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 54087e9..6700069 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -14,6 +14,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv6-nf: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -180,13 +182,11 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 
 	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
 	local_bh_enable();
-	if (q == NULL)
-		goto oom;
-
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 	return container_of(q, struct frag_queue, q);
-
-oom:
-	return NULL;
 }
 
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3c6a772..196ab93 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -26,6 +26,9 @@
  *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
  *				calculate ICV correctly.
  */
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -185,9 +188,10 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6
 	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
-	if (q == NULL)
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
-
+	}
 	return container_of(q, struct frag_queue, q);
 }
 
-- 
cgit v1.1


From 3dd6664fac7e6041bfc8756ae9e8c78f59108cd9 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Tue, 19 Mar 2013 13:09:59 +0000
Subject: netfilter: remove unused "config IP_NF_QUEUE"

Kconfig symbol IP_NF_QUEUE is unused since commit
d16cf20e2f2f13411eece7f7fb72c17d141c4a84 ("netfilter: remove ip_queue
support"). Let's remove it too.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ce2d43e..0d755c5 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
-config IP_NF_QUEUE
-	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
-	depends on NETFILTER_ADVANCED
-	help
-	  Netfilter has the ability to queue packets to user space: the
-	  netlink device can be used to access them using this driver.
-
-	  This option enables the old IPv4-only "ip_queue" implementation
-	  which has been obsoleted by the new "nfnetlink_queue" code (see
-	  CONFIG_NETFILTER_NETLINK_QUEUE).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
 	default m if NETFILTER_ADVANCED=n
-- 
cgit v1.1


From f1e79e208076ffe7bad97158275f1c572c04f5c7 Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Tue, 19 Mar 2013 01:47:27 +0000
Subject: genetlink: trigger BUG_ON if a group name is too long

Trigger BUG_ON if a group name is longer than GENL_NAMSIZ.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f2aabb6..5a55be3 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -142,6 +142,7 @@ int genl_register_mc_group(struct genl_family *family,
 	int err = 0;
 
 	BUG_ON(grp->name[0] == '\0');
+	BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL);
 
 	genl_lock();
 
-- 
cgit v1.1


From 44046a593eb770dbecdabf1c82bcd252f2a8337b Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:12 +0000
Subject: udp: add encap_destroy callback

Users of udp encapsulation currently have an encap_rcv callback which they can
use to hook into the udp receive path.

In situations where a encapsulation user allocates resources associated with a
udp encap socket, it may be convenient to be able to also hook the proto
.destroy operation.  For example, if an encap user holds a reference to the
udp socket, the destroy hook might be used to relinquish this reference.

This patch adds a socket destroy hook into udp, which is set and enabled
in the same way as the existing encap_rcv hook.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 7 +++++++
 net/ipv6/udp.c | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 265c42c..0a073a2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1762,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
+	struct udp_sock *up = udp_sk(sk);
 	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
 	unlock_sock_fast(sk, slow);
+	if (static_key_false(&udp_encap_needed) && up->encap_type) {
+		void (*encap_destroy)(struct sock *sk);
+		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		if (encap_destroy)
+			encap_destroy(sk);
+	}
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 599e1ba6..d8e5e85 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1285,10 +1285,18 @@ do_confirm:
 
 void udpv6_destroy_sock(struct sock *sk)
 {
+	struct udp_sock *up = udp_sk(sk);
 	lock_sock(sk);
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
+	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+		void (*encap_destroy)(struct sock *sk);
+		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		if (encap_destroy)
+			encap_destroy(sk);
+	}
+
 	inet6_destroy_sock(sk);
 }
 
-- 
cgit v1.1


From 9980d001cec86c3c75f3a6008ddb73c397ea3b3e Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:13 +0000
Subject: l2tp: add udp encap socket destroy handler

L2TP sessions hold a reference to the tunnel socket to prevent it going away
while sessions are still active.  However, since tunnel destruction is handled
by the sock sk_destruct callback there is a catch-22: a tunnel with sessions
cannot be deleted since each session holds a reference to the tunnel socket.
If userspace closes a managed tunnel socket, or dies, the tunnel will persist
and it will be neccessary to individually delete the sessions using netlink
commands.  This is ugly.

To prevent this occuring, this patch leverages the udp encapsulation socket
destroy callback to gain early notification when the tunnel socket is closed.
This allows us to safely close the sessions running in the tunnel, dropping
the tunnel socket references in the process.  The tunnel socket is then
destroyed as normal, and the tunnel resources deallocated in sk_destruct.

While we're at it, ensure that l2tp_tunnel_closeall correctly drops session
references to allow the sessions to be deleted rather than leaking.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index d36875f..ee726a7 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1282,6 +1282,7 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 		/* No longer an encapsulation socket. See net/ipv4/udp.c */
 		(udp_sk(sk))->encap_type = 0;
 		(udp_sk(sk))->encap_rcv = NULL;
+		(udp_sk(sk))->encap_destroy = NULL;
 		break;
 	case L2TP_ENCAPTYPE_IP:
 		break;
@@ -1360,6 +1361,8 @@ again:
 			if (session->deref != NULL)
 				(*session->deref)(session);
 
+			l2tp_session_dec_refcount(session);
+
 			write_lock_bh(&tunnel->hlist_lock);
 
 			/* Now restart from the beginning of this hash
@@ -1373,6 +1376,16 @@ again:
 	write_unlock_bh(&tunnel->hlist_lock);
 }
 
+/* Tunnel socket destroy hook for UDP encapsulation */
+static void l2tp_udp_encap_destroy(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+}
+
 /* Really kill the tunnel.
  * Come here only when all sessions have been cleared from the tunnel.
  */
@@ -1668,6 +1681,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
 		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+		udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
 #if IS_ENABLED(CONFIG_IPV6)
 		if (sk->sk_family == PF_INET6)
 			udpv6_encap_enable();
-- 
cgit v1.1


From e34f4c7050e5471b6d4fb25380713937fc837514 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:14 +0000
Subject: l2tp: export l2tp_tunnel_closeall

l2tp_core internally uses l2tp_tunnel_closeall to close all sessions in a
tunnel when a UDP-encapsulation socket is destroyed.  We need to do something
similar for IP-encapsulation sockets.

Export l2tp_tunnel_closeall as a GPL symbol to enable l2tp_ip and l2tp_ip6 to
call it from their .destroy handlers.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 4 ++--
 net/l2tp/l2tp_core.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ee726a7..287e327 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -114,7 +114,6 @@ struct l2tp_net {
 
 static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
-static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
 {
@@ -1312,7 +1311,7 @@ end:
 
 /* When the tunnel is closed, all the attached sessions need to go too.
  */
-static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
 {
 	int hash;
 	struct hlist_node *walk;
@@ -1375,6 +1374,7 @@ again:
 	}
 	write_unlock_bh(&tunnel->hlist_lock);
 }
+EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
 
 /* Tunnel socket destroy hook for UDP encapsulation */
 static void l2tp_udp_encap_destroy(struct sock *sk)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8eb8f1d..b0861f6 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -240,6 +240,7 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
 extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
 extern int l2tp_session_delete(struct l2tp_session *session);
-- 
cgit v1.1


From 936063175afd895913a5e9db77e1a0ef43ea44ea Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:15 +0000
Subject: l2tp: close sessions in ip socket destroy callback

l2tp_core hooks UDP's .destroy handler to gain advance warning of a tunnel
socket being closed from userspace.  We need to do the same thing for
IP-encapsulation sockets.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 6 ++++++
 net/l2tp/l2tp_ip6.c | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 7f41b70..571db8d 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout)
 static void l2tp_ip_destroy_sock(struct sock *sk)
 {
 	struct sk_buff *skb;
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
 		kfree_skb(skb);
 
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+
 	sk_refcnt_debug_dec(sk);
 }
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 41f2f81..c74f5a9 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -241,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout)
 
 static void l2tp_ip6_destroy_sock(struct sock *sk)
 {
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+
 	lock_sock(sk);
 	ip6_flush_pending_frames(sk);
 	release_sock(sk);
 
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+
 	inet6_destroy_sock(sk);
 }
 
-- 
cgit v1.1


From 2b551c6e7d5bca2c78c216b15ef675653d4f459a Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:16 +0000
Subject: l2tp: close sessions before initiating tunnel delete

When a user deletes a tunnel using netlink, all the sessions in the tunnel
should also be deleted.  Since running sessions will pin the tunnel socket
with the references they hold, have the l2tp_tunnel_delete close all sessions
in a tunnel before finally closing the tunnel socket.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 287e327..0dd50c0 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1737,6 +1737,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
  */
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
+	l2tp_tunnel_closeall(tunnel);
 	return (false == queue_work(l2tp_wq, &tunnel->del_work));
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
-- 
cgit v1.1


From 8abbbe8ff572fd84d1b98eb9acf30611a97cf72e Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:17 +0000
Subject: l2tp: take a reference for kernel sockets in l2tp_tunnel_sock_lookup

When looking up the tunnel socket in struct l2tp_tunnel, hold a reference
whether the socket was created by the kernel or by userspace.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 0dd50c0..45373fe 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -191,6 +191,7 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
 	} else {
 		/* Socket is owned by kernelspace */
 		sk = tunnel->sock;
+		sock_hold(sk);
 	}
 
 out:
@@ -209,6 +210,7 @@ void l2tp_tunnel_sock_put(struct sock *sk)
 		}
 		sock_put(sk);
 	}
+	sock_put(sk);
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put);
 
-- 
cgit v1.1


From 02d13ed5f94af38c37d1abd53462fe48d78bcc9d Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:18 +0000
Subject: l2tp: don't BUG_ON sk_socket being NULL

It is valid for an existing struct sock object to have a NULL sk_socket
pointer, so don't BUG_ON in l2tp_tunnel_del_work if that should occur.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 45373fe..e841ef2 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1412,19 +1412,21 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
 		return;
 
 	sock = sk->sk_socket;
-	BUG_ON(!sock);
 
-	/* If the tunnel socket was created directly by the kernel, use the
-	 * sk_* API to release the socket now.  Otherwise go through the
-	 * inet_* layer to shut the socket down, and let userspace close it.
+	/* If the tunnel socket was created by userspace, then go through the
+	 * inet layer to shut the socket down, and let userspace close it.
+	 * Otherwise, if we created the socket directly within the kernel, use
+	 * the sk API to release it here.
 	 * In either case the tunnel resources are freed in the socket
 	 * destructor when the tunnel socket goes away.
 	 */
-	if (sock->file == NULL) {
-		kernel_sock_shutdown(sock, SHUT_RDWR);
-		sk_release_kernel(sk);
+	if (tunnel->fd >= 0) {
+		if (sock)
+			inet_shutdown(sock, 2);
 	} else {
-		inet_shutdown(sock, 2);
+		if (sock)
+			kernel_sock_shutdown(sock, SHUT_RDWR);
+		sk_release_kernel(sk);
 	}
 
 	l2tp_tunnel_sock_put(sk);
-- 
cgit v1.1


From 48f72f92b31431c40279b0fba6c5588e07e67d95 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:19 +0000
Subject: l2tp: add session reorder queue purge function to core

If an l2tp session is deleted, it is necessary to delete skbs in-flight
on the session's reorder queue before taking it down.

Rather than having each pseudowire implementation reaching into the
l2tp_session struct to handle this itself, provide a function in l2tp_core to
purge the session queue.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 17 +++++++++++++++++
 net/l2tp/l2tp_core.h |  1 +
 2 files changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e841ef2..69c316d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -829,6 +829,23 @@ discard:
 }
 EXPORT_SYMBOL(l2tp_recv_common);
 
+/* Drop skbs from the session's reorder_q
+ */
+int l2tp_session_queue_purge(struct l2tp_session *session)
+{
+	struct sk_buff *skb = NULL;
+	BUG_ON(!session);
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+	while ((skb = skb_dequeue(&session->reorder_q))) {
+		atomic_long_inc(&session->stats.rx_errors);
+		kfree_skb(skb);
+		if (session->deref)
+			(*session->deref)(session);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_queue_purge);
+
 /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
  * here. The skb is not on a list when we get here.
  * Returns 0 if the packet was a data packet and was successfully passed on.
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index b0861f6..d40713d 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -246,6 +246,7 @@ extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunne
 extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_session_queue_purge(struct l2tp_session *session);
 extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
 
 extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
-- 
cgit v1.1


From 4c6e2fd35460208596fa099ee0750a4b0438aa5c Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:20 +0000
Subject: l2tp: purge session reorder queue on delete

Add calls to l2tp_session_queue_purge as a part of l2tp_tunnel_closeall
and l2tp_session_delete.  Pseudowire implementations which are deleted only
via. l2tp_core l2tp_session_delete calls can dispense with their own code for
flushing the reorder queue.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 69c316d..c00f31b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1373,6 +1373,8 @@ again:
 				synchronize_rcu();
 			}
 
+			l2tp_session_queue_purge(session);
+
 			if (session->session_close != NULL)
 				(*session->session_close)(session);
 
@@ -1813,6 +1815,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_free);
  */
 int l2tp_session_delete(struct l2tp_session *session)
 {
+	l2tp_session_queue_purge(session);
+
 	if (session->session_close != NULL)
 		(*session->session_close)(session);
 
-- 
cgit v1.1


From cf2f5c886a209377daefd5d2ba0bcd49c3887813 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:21 +0000
Subject: l2tp: push all ppp pseudowire shutdown through .release handler

If userspace deletes a ppp pseudowire using the netlink API, either by
directly deleting the session or by deleting the tunnel that contains the
session, we need to tear down the corresponding pppox channel.

Rather than trying to manage two pppox unbind codepaths, switch the netlink
and l2tp_core session_close handlers to close via. the l2tp_ppp socket
.release handler.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 53 ++++++++++-------------------------------------------
 1 file changed, 10 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 6a53371..7e3e16a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -97,6 +97,7 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/xfrm.h>
+#include <net/inet_common.h>
 
 #include <asm/byteorder.h>
 #include <linux/atomic.h>
@@ -447,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session)
 {
 	struct pppol2tp_session *ps = l2tp_session_priv(session);
 	struct sock *sk = ps->sock;
-	struct sk_buff *skb;
+	struct socket *sock = sk->sk_socket;
 
 	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
-	if (session->session_id == 0)
-		goto out;
-
-	if (sk != NULL) {
-		lock_sock(sk);
-
-		if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
-			pppox_unbind_sock(sk);
-			sk->sk_state = PPPOX_DEAD;
-			sk->sk_state_change(sk);
-		}
-
-		/* Purge any queued data */
-		skb_queue_purge(&sk->sk_receive_queue);
-		skb_queue_purge(&sk->sk_write_queue);
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
 
-		release_sock(sk);
+	if (sock) {
+		inet_shutdown(sock, 2);
+		/* Don't let the session go away before our socket does */
+		l2tp_session_inc_refcount(session);
 	}
-
-out:
 	return;
 }
 
@@ -525,16 +508,12 @@ static int pppol2tp_release(struct socket *sock)
 	session = pppol2tp_sock_to_session(sk);
 
 	/* Purge any queued data */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
 	if (session != NULL) {
-		struct sk_buff *skb;
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
+		l2tp_session_queue_purge(session);
 		sock_put(sk);
 	}
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
 
 	release_sock(sk);
 
@@ -880,18 +859,6 @@ out:
 	return error;
 }
 
-/* Called when deleting sessions via the netlink interface.
- */
-static int pppol2tp_session_delete(struct l2tp_session *session)
-{
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
-
-	if (ps->sock == NULL)
-		l2tp_session_dec_refcount(session);
-
-	return 0;
-}
-
 #endif /* CONFIG_L2TP_V3 */
 
 /* getname() support.
@@ -1839,7 +1806,7 @@ static const struct pppox_proto pppol2tp_proto = {
 
 static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
 	.session_create	= pppol2tp_session_create,
-	.session_delete	= pppol2tp_session_delete,
+	.session_delete	= l2tp_session_delete,
 };
 
 #endif /* CONFIG_L2TP_V3 */
-- 
cgit v1.1


From 7b7c0719cd7afee725b920d75ec6a500b76107e6 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:22 +0000
Subject: l2tp: avoid deadlock in l2tp stats update

l2tp's u64_stats writers were incorrectly synchronised, making it possible to
deadlock a 64bit machine running a 32bit kernel simply by sending the l2tp
code netlink commands while passing data through l2tp sessions.

Previous discussion on netdev determined that alternative solutions such as
spinlock writer synchronisation or per-cpu data would bring unjustified
overhead, given that most users interested in high volume traffic will likely
be running 64bit kernels on 64bit hardware.

As such, this patch replaces l2tp's use of u64_stats with atomic_long_t,
thereby avoiding the deadlock.

Ref:
http://marc.info/?l=linux-netdev&m=134029167910731&w=2
http://marc.info/?l=linux-netdev&m=134079868111131&w=2

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    | 75 +++++++++++++------------------------------------
 net/l2tp/l2tp_core.h    | 19 ++++++-------
 net/l2tp/l2tp_debugfs.c | 28 +++++++++---------
 net/l2tp/l2tp_netlink.c | 72 ++++++++++++++++++-----------------------------
 net/l2tp/l2tp_ppp.c     | 46 +++++++++++++++---------------
 5 files changed, 93 insertions(+), 147 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index c00f31b..97d30ac 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -374,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 	struct sk_buff *skbp;
 	struct sk_buff *tmp;
 	u32 ns = L2TP_SKB_CB(skb)->ns;
-	struct l2tp_stats *sstats;
 
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
 		if (L2TP_SKB_CB(skbp)->ns > ns) {
 			__skb_queue_before(&session->reorder_q, skbp, skb);
@@ -385,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 				 "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
 				 session->name, ns, L2TP_SKB_CB(skbp)->ns,
 				 skb_queue_len(&session->reorder_q));
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_oos_packets++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_oos_packets);
 			goto out;
 		}
 	}
@@ -404,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
 {
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	int length = L2TP_SKB_CB(skb)->length;
-	struct l2tp_stats *tstats, *sstats;
 
 	/* We're about to requeue the skb, so return resources
 	 * to its current owner (a socket receive buffer).
 	 */
 	skb_orphan(skb);
 
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
-	tstats->rx_packets++;
-	tstats->rx_bytes += length;
-	sstats->rx_packets++;
-	sstats->rx_bytes += length;
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
+	atomic_long_inc(&tunnel->stats.rx_packets);
+	atomic_long_add(length, &tunnel->stats.rx_bytes);
+	atomic_long_inc(&session->stats.rx_packets);
+	atomic_long_add(length, &session->stats.rx_bytes);
 
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Bump our Nr */
@@ -451,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 {
 	struct sk_buff *skb;
 	struct sk_buff *tmp;
-	struct l2tp_stats *sstats;
 
 	/* If the pkt at the head of the queue has the nr that we
 	 * expect to send up next, dequeue it and any other
@@ -459,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 	 */
 start:
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
 		if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			sstats->rx_errors++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
+			atomic_long_inc(&session->stats.rx_errors);
 			l2tp_dbg(session, L2TP_MSG_SEQ,
 				 "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n",
 				 session->name, L2TP_SKB_CB(skb)->ns,
@@ -624,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	int offset;
 	u32 ns, nr;
-	struct l2tp_stats *sstats = &session->stats;
 
 	/* The ref count is increased since we now hold a pointer to
 	 * the session. Take care to decrement the refcnt when exiting
@@ -641,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 				  "%s: cookie mismatch (%u/%u). Discarding.\n",
 				  tunnel->name, tunnel->tunnel_id,
 				  session->session_id);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_cookie_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_cookie_discards);
 			goto discard;
 		}
 		ptr += session->peer_cookie_len;
@@ -712,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
 
@@ -733,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
 	}
@@ -789,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			 * packets
 			 */
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
-				u64_stats_update_begin(&sstats->syncp);
-				sstats->rx_seq_discards++;
-				u64_stats_update_end(&sstats->syncp);
+				atomic_long_inc(&session->stats.rx_seq_discards);
 				l2tp_dbg(session, L2TP_MSG_SEQ,
 					 "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n",
 					 session->name, L2TP_SKB_CB(skb)->ns,
@@ -817,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	return;
 
 discard:
-	u64_stats_update_begin(&sstats->syncp);
-	sstats->rx_errors++;
-	u64_stats_update_end(&sstats->syncp);
+	atomic_long_inc(&session->stats.rx_errors);
 	kfree_skb(skb);
 
 	if (session->deref)
@@ -861,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	u32 tunnel_id, session_id;
 	u16 version;
 	int length;
-	struct l2tp_stats *tstats;
 
 	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
 		goto discard_bad_csum;
@@ -950,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 discard_bad_csum:
 	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
 	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	tstats->rx_errors++;
-	u64_stats_update_end(&tstats->syncp);
+	atomic_long_inc(&tunnel->stats.rx_errors);
 	kfree_skb(skb);
 
 	return 0;
@@ -1080,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	unsigned int len = skb->len;
 	int error;
-	struct l2tp_stats *tstats, *sstats;
 
 	/* Debug */
 	if (session->send_seq)
@@ -1109,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 		error = ip_queue_xmit(skb, fl);
 
 	/* Update stats */
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
 	if (error >= 0) {
-		tstats->tx_packets++;
-		tstats->tx_bytes += len;
-		sstats->tx_packets++;
-		sstats->tx_bytes += len;
+		atomic_long_inc(&tunnel->stats.tx_packets);
+		atomic_long_add(len, &tunnel->stats.tx_bytes);
+		atomic_long_inc(&session->stats.tx_packets);
+		atomic_long_add(len, &session->stats.tx_bytes);
 	} else {
-		tstats->tx_errors++;
-		sstats->tx_errors++;
+		atomic_long_inc(&tunnel->stats.tx_errors);
+		atomic_long_inc(&session->stats.tx_errors);
 	}
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
 
 	return 0;
 }
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index d40713d..519b013 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -36,16 +36,15 @@ enum {
 struct sk_buff;
 
 struct l2tp_stats {
-	u64			tx_packets;
-	u64			tx_bytes;
-	u64			tx_errors;
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_seq_discards;
-	u64			rx_oos_packets;
-	u64			rx_errors;
-	u64			rx_cookie_discards;
-	struct u64_stats_sync	syncp;
+	atomic_long_t		tx_packets;
+	atomic_long_t		tx_bytes;
+	atomic_long_t		tx_errors;
+	atomic_long_t		rx_packets;
+	atomic_long_t		rx_bytes;
+	atomic_long_t		rx_seq_discards;
+	atomic_long_t		rx_oos_packets;
+	atomic_long_t		rx_errors;
+	atomic_long_t		rx_cookie_discards;
 };
 
 struct l2tp_tunnel;
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index c3813bc..072d720 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
 		   atomic_read(&tunnel->ref_count));
 
-	seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+	seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
 		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+		   atomic_long_read(&tunnel->stats.tx_packets),
+		   atomic_long_read(&tunnel->stats.tx_bytes),
+		   atomic_long_read(&tunnel->stats.tx_errors),
+		   atomic_long_read(&tunnel->stats.rx_packets),
+		   atomic_long_read(&tunnel->stats.rx_bytes),
+		   atomic_long_read(&tunnel->stats.rx_errors));
 
 	if (tunnel->show != NULL)
 		tunnel->show(m, tunnel);
@@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 		seq_printf(m, "\n");
 	}
 
-	seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+	seq_printf(m, "   %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n",
 		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+		   atomic_long_read(&session->stats.tx_packets),
+		   atomic_long_read(&session->stats.tx_bytes),
+		   atomic_long_read(&session->stats.tx_errors),
+		   atomic_long_read(&session->stats.rx_packets),
+		   atomic_long_read(&session->stats.rx_bytes),
+		   atomic_long_read(&session->stats.rx_errors));
 
 	if (session->show != NULL)
 		session->show(m, session);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index c1bab22..0825ff2 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -246,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 #if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo *np = NULL;
 #endif
-	struct l2tp_stats stats;
-	unsigned int start;
 
 	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
 			  L2TP_CMD_TUNNEL_GET);
@@ -265,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&tunnel->stats.syncp);
-		stats.tx_packets = tunnel->stats.tx_packets;
-		stats.tx_bytes = tunnel->stats.tx_bytes;
-		stats.tx_errors = tunnel->stats.tx_errors;
-		stats.rx_packets = tunnel->stats.rx_packets;
-		stats.rx_bytes = tunnel->stats.rx_bytes;
-		stats.rx_errors = tunnel->stats.rx_errors;
-		stats.rx_seq_discards = tunnel->stats.rx_seq_discards;
-		stats.rx_oos_packets = tunnel->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&tunnel->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS,
+		    atomic_long_read(&tunnel->stats.tx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES,
+		    atomic_long_read(&tunnel->stats.tx_bytes)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS,
+		    atomic_long_read(&tunnel->stats.tx_errors)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS,
+		    atomic_long_read(&tunnel->stats.rx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES,
+		    atomic_long_read(&tunnel->stats.rx_bytes)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+		    atomic_long_read(&tunnel->stats.rx_seq_discards)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+		    atomic_long_read(&tunnel->stats.rx_oos_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS,
+		    atomic_long_read(&tunnel->stats.rx_errors)))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
@@ -612,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	struct nlattr *nest;
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	struct sock *sk = NULL;
-	struct l2tp_stats stats;
-	unsigned int start;
 
 	sk = tunnel->sock;
 
@@ -656,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&session->stats.syncp);
-		stats.tx_packets = session->stats.tx_packets;
-		stats.tx_bytes = session->stats.tx_bytes;
-		stats.tx_errors = session->stats.tx_errors;
-		stats.rx_packets = session->stats.rx_packets;
-		stats.rx_bytes = session->stats.rx_bytes;
-		stats.rx_errors = session->stats.rx_errors;
-		stats.rx_seq_discards = session->stats.rx_seq_discards;
-		stats.rx_oos_packets = session->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&session->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS,
+		atomic_long_read(&session->stats.tx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES,
+		atomic_long_read(&session->stats.tx_bytes)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS,
+		atomic_long_read(&session->stats.tx_errors)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS,
+		atomic_long_read(&session->stats.rx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES,
+		atomic_long_read(&session->stats.rx_bytes)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+		atomic_long_read(&session->stats.rx_seq_discards)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+		atomic_long_read(&session->stats.rx_oos_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS,
+		atomic_long_read(&session->stats.rx_errors)))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 7e3e16a..9d0eb8c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -260,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 			  session->name);
 
 		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
+		atomic_long_inc(&session->stats.rx_errors);
 		kfree_skb(skb);
 	}
 
@@ -992,14 +992,14 @@ end:
 static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
 				struct l2tp_stats *stats)
 {
-	dest->tx_packets = stats->tx_packets;
-	dest->tx_bytes = stats->tx_bytes;
-	dest->tx_errors = stats->tx_errors;
-	dest->rx_packets = stats->rx_packets;
-	dest->rx_bytes = stats->rx_bytes;
-	dest->rx_seq_discards = stats->rx_seq_discards;
-	dest->rx_oos_packets = stats->rx_oos_packets;
-	dest->rx_errors = stats->rx_errors;
+	dest->tx_packets = atomic_long_read(&stats->tx_packets);
+	dest->tx_bytes = atomic_long_read(&stats->tx_bytes);
+	dest->tx_errors = atomic_long_read(&stats->tx_errors);
+	dest->rx_packets = atomic_long_read(&stats->rx_packets);
+	dest->rx_bytes = atomic_long_read(&stats->rx_bytes);
+	dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards);
+	dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets);
+	dest->rx_errors = atomic_long_read(&stats->rx_errors);
 }
 
 /* Session ioctl helper.
@@ -1633,14 +1633,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->name,
 		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
 		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+	seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
 		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+		   atomic_long_read(&tunnel->stats.tx_packets),
+		   atomic_long_read(&tunnel->stats.tx_bytes),
+		   atomic_long_read(&tunnel->stats.tx_errors),
+		   atomic_long_read(&tunnel->stats.rx_packets),
+		   atomic_long_read(&tunnel->stats.rx_bytes),
+		   atomic_long_read(&tunnel->stats.rx_errors));
 }
 
 static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
@@ -1675,14 +1675,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   session->lns_mode ? "LNS" : "LAC",
 		   session->debug,
 		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+	seq_printf(m, "   %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n",
 		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+		   atomic_long_read(&session->stats.tx_packets),
+		   atomic_long_read(&session->stats.tx_bytes),
+		   atomic_long_read(&session->stats.tx_errors),
+		   atomic_long_read(&session->stats.rx_packets),
+		   atomic_long_read(&session->stats.rx_bytes),
+		   atomic_long_read(&session->stats.rx_errors));
 
 	if (po)
 		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
-- 
cgit v1.1


From f6e16b299bacaa71c6604a784f2d088a966f8c23 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:23 +0000
Subject: l2tp: unhash l2tp sessions on delete, not on free

If we postpone unhashing of l2tp sessions until the structure is freed, we
risk:

 1. further packets arriving and getting queued while the pseudowire is being
    closed down
 2. the recv path hitting "scheduling while atomic" errors in the case that
    recv drops the last reference to a session and calls l2tp_session_free
    while in atomic context

As such, l2tp sessions should be unhashed from l2tp_core data structures early
in the teardown process prior to calling pseudowire close.  For pseudowires
like l2tp_ppp which have multiple shutdown codepaths, provide an unhash hook.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 75 ++++++++++++++++++++++++----------------------------
 net/l2tp/l2tp_core.h |  1 +
 net/l2tp/l2tp_ppp.c  | 12 +++------
 3 files changed, 38 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 97d30ac..8aecf5d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1316,26 +1316,12 @@ again:
 
 			hlist_del_init(&session->hlist);
 
-			/* Since we should hold the sock lock while
-			 * doing any unbinding, we need to release the
-			 * lock we're holding before taking that lock.
-			 * Hold a reference to the sock so it doesn't
-			 * disappear as we're jumping between locks.
-			 */
 			if (session->ref != NULL)
 				(*session->ref)(session);
 
 			write_unlock_bh(&tunnel->hlist_lock);
 
-			if (tunnel->version != L2TP_HDR_VER_2) {
-				struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
-				spin_lock_bh(&pn->l2tp_session_hlist_lock);
-				hlist_del_init_rcu(&session->global_hlist);
-				spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-				synchronize_rcu();
-			}
-
+			__l2tp_session_unhash(session);
 			l2tp_session_queue_purge(session);
 
 			if (session->session_close != NULL)
@@ -1732,64 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
  */
 void l2tp_session_free(struct l2tp_session *session)
 {
-	struct l2tp_tunnel *tunnel;
+	struct l2tp_tunnel *tunnel = session->tunnel;
 
 	BUG_ON(atomic_read(&session->ref_count) != 0);
 
-	tunnel = session->tunnel;
-	if (tunnel != NULL) {
+	if (tunnel) {
 		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+		if (session->session_id != 0)
+			atomic_dec(&l2tp_session_count);
+		sock_put(tunnel->sock);
+		session->tunnel = NULL;
+		l2tp_tunnel_dec_refcount(tunnel);
+	}
+
+	kfree(session);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_free);
+
+/* Remove an l2tp session from l2tp_core's hash lists.
+ * Provides a tidyup interface for pseudowire code which can't just route all
+ * shutdown via. l2tp_session_delete and a pseudowire-specific session_close
+ * callback.
+ */
+void __l2tp_session_unhash(struct l2tp_session *session)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
 
-		/* Delete the session from the hash */
+	/* Remove the session from core hashes */
+	if (tunnel) {
+		/* Remove from the per-tunnel hash */
 		write_lock_bh(&tunnel->hlist_lock);
 		hlist_del_init(&session->hlist);
 		write_unlock_bh(&tunnel->hlist_lock);
 
-		/* Unlink from the global hash if not L2TPv2 */
+		/* For L2TPv3 we have a per-net hash: remove from there, too */
 		if (tunnel->version != L2TP_HDR_VER_2) {
 			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
 			spin_lock_bh(&pn->l2tp_session_hlist_lock);
 			hlist_del_init_rcu(&session->global_hlist);
 			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
 			synchronize_rcu();
 		}
-
-		if (session->session_id != 0)
-			atomic_dec(&l2tp_session_count);
-
-		sock_put(tunnel->sock);
-
-		/* This will delete the tunnel context if this
-		 * is the last session on the tunnel.
-		 */
-		session->tunnel = NULL;
-		l2tp_tunnel_dec_refcount(tunnel);
 	}
-
-	kfree(session);
-
-	return;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_free);
+EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
 
 /* This function is used by the netlink SESSION_DELETE command and by
    pseudowire modules.
  */
 int l2tp_session_delete(struct l2tp_session *session)
 {
+	if (session->ref)
+		(*session->ref)(session);
+	__l2tp_session_unhash(session);
 	l2tp_session_queue_purge(session);
-
 	if (session->session_close != NULL)
 		(*session->session_close)(session);
-
+	if (session->deref)
+		(*session->ref)(session);
 	l2tp_session_dec_refcount(session);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(l2tp_session_delete);
 
-
 /* We come here whenever a session's send_seq, cookie_len or
  * l2specific_len parameters are set.
  */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 519b013..485a490 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -242,6 +242,7 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
 extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern void __l2tp_session_unhash(struct l2tp_session *session);
 extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 9d0eb8c..637a341 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -466,19 +466,12 @@ static void pppol2tp_session_close(struct l2tp_session *session)
  */
 static void pppol2tp_session_destruct(struct sock *sk)
 {
-	struct l2tp_session *session;
-
-	if (sk->sk_user_data != NULL) {
-		session = sk->sk_user_data;
-		if (session == NULL)
-			goto out;
-
+	struct l2tp_session *session = sk->sk_user_data;
+	if (session) {
 		sk->sk_user_data = NULL;
 		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 		l2tp_session_dec_refcount(session);
 	}
-
-out:
 	return;
 }
 
@@ -509,6 +502,7 @@ static int pppol2tp_release(struct socket *sock)
 
 	/* Purge any queued data */
 	if (session != NULL) {
+		__l2tp_session_unhash(session);
 		l2tp_session_queue_purge(session);
 		sock_put(sk);
 	}
-- 
cgit v1.1


From 8ed781668dd49b608f1e67a22e3b445fd0c2cd6f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 19 Mar 2013 06:39:29 +0000
Subject: flow_keys: include thoff into flow_keys for later usage

In skb_flow_dissect(), we perform a dissection of a skbuff. Since we're
doing the work here anyway, also store thoff for a later usage, e.g. in
the BPF filter.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 9d4c720..e187bf0 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -140,6 +140,8 @@ ipv6:
 			flow->ports = *ports;
 	}
 
+	flow->thoff = (u16) nhoff;
+
 	return true;
 }
 EXPORT_SYMBOL(skb_flow_dissect);
-- 
cgit v1.1


From 283951f95b067877ca5ea77afaa212bb1e0507b5 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@parkeon.com>
Date: Tue, 19 Mar 2013 08:19:29 +0000
Subject: ipconfig: Fix newline handling in log message.

When using ipconfig the logs currently look like:

Single name server:
[    3.467270] IP-Config: Complete:
[    3.470613]      device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1
[    3.480670]      host=infigo-1, domain=, nis-domain=(none)
[    3.486166]      bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath=
[    3.492910]      nameserver0=172.16.42.1[    3.496853] ALSA device list:

Three name servers:
[    3.496949] IP-Config: Complete:
[    3.500293]      device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1
[    3.510367]      host=infigo-1, domain=, nis-domain=(none)
[    3.515864]      bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath=
[    3.522635]      nameserver0=172.16.42.1, nameserver1=172.16.42.100
[    3.529149] , nameserver2=172.16.42.200

Fix newline handling for these cases

Signed-off-by: Martin Fuzzey <mfuzzey@parkeon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 98cbc68..bf6c5cf 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1522,7 +1522,8 @@ static int __init ip_auto_config(void)
 		}
 	for (i++; i < CONF_NAMESERVERS_MAX; i++)
 		if (ic_nameservers[i] != NONE)
-			pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]);
+			pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]);
+	pr_cont("\n");
 #endif /* !SILENT */
 
 	return 0;
-- 
cgit v1.1


From 896ee0eee6261e30c3623be931c3f621428947df Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 20 Mar 2013 05:19:24 +0000
Subject: net/irda: add missing error path release_sock call

This makes sure that release_sock is called for all error conditions in
irda_getsockopt.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Brad Spengler <spender@grsecurity.net>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index d07e3a6..d28e7f0 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2583,8 +2583,10 @@ bed:
 				    NULL, NULL, NULL);
 
 		/* Check if the we got some results */
-		if (!self->cachedaddr)
-			return -EAGAIN;		/* Didn't find any devices */
+		if (!self->cachedaddr) {
+			err = -EAGAIN;		/* Didn't find any devices */
+			goto out;
+		}
 		daddr = self->cachedaddr;
 		/* Cleanup */
 		self->cachedaddr = 0;
-- 
cgit v1.1


From f4541d60a449afd40448b06496dcd510f505928e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Mar 2013 17:36:09 +0000
Subject: tcp: preserve ACK clocking in TSO

A long standing problem with TSO is the fact that tcp_tso_should_defer()
rearms the deferred timer, while it should not.

Current code leads to following bad bursty behavior :

20:11:24.484333 IP A > B: . 297161:316921(19760) ack 1 win 119
20:11:24.484337 IP B > A: . ack 263721 win 1117
20:11:24.485086 IP B > A: . ack 265241 win 1117
20:11:24.485925 IP B > A: . ack 266761 win 1117
20:11:24.486759 IP B > A: . ack 268281 win 1117
20:11:24.487594 IP B > A: . ack 269801 win 1117
20:11:24.488430 IP B > A: . ack 271321 win 1117
20:11:24.489267 IP B > A: . ack 272841 win 1117
20:11:24.490104 IP B > A: . ack 274361 win 1117
20:11:24.490939 IP B > A: . ack 275881 win 1117
20:11:24.491775 IP B > A: . ack 277401 win 1117
20:11:24.491784 IP A > B: . 316921:332881(15960) ack 1 win 119
20:11:24.492620 IP B > A: . ack 278921 win 1117
20:11:24.493448 IP B > A: . ack 280441 win 1117
20:11:24.494286 IP B > A: . ack 281961 win 1117
20:11:24.495122 IP B > A: . ack 283481 win 1117
20:11:24.495958 IP B > A: . ack 285001 win 1117
20:11:24.496791 IP B > A: . ack 286521 win 1117
20:11:24.497628 IP B > A: . ack 288041 win 1117
20:11:24.498459 IP B > A: . ack 289561 win 1117
20:11:24.499296 IP B > A: . ack 291081 win 1117
20:11:24.500133 IP B > A: . ack 292601 win 1117
20:11:24.500970 IP B > A: . ack 294121 win 1117
20:11:24.501388 IP B > A: . ack 295641 win 1117
20:11:24.501398 IP A > B: . 332881:351881(19000) ack 1 win 119

While the expected behavior is more like :

20:19:49.259620 IP A > B: . 197601:202161(4560) ack 1 win 119
20:19:49.260446 IP B > A: . ack 154281 win 1212
20:19:49.261282 IP B > A: . ack 155801 win 1212
20:19:49.262125 IP B > A: . ack 157321 win 1212
20:19:49.262136 IP A > B: . 202161:206721(4560) ack 1 win 119
20:19:49.262958 IP B > A: . ack 158841 win 1212
20:19:49.263795 IP B > A: . ack 160361 win 1212
20:19:49.264628 IP B > A: . ack 161881 win 1212
20:19:49.264637 IP A > B: . 206721:211281(4560) ack 1 win 119
20:19:49.265465 IP B > A: . ack 163401 win 1212
20:19:49.265886 IP B > A: . ack 164921 win 1212
20:19:49.266722 IP B > A: . ack 166441 win 1212
20:19:49.266732 IP A > B: . 211281:215841(4560) ack 1 win 119
20:19:49.267559 IP B > A: . ack 167961 win 1212
20:19:49.268394 IP B > A: . ack 169481 win 1212
20:19:49.269232 IP B > A: . ack 171001 win 1212
20:19:49.269241 IP A > B: . 215841:221161(5320) ack 1 win 119

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Van Jacobson <vanj@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 817fbb3..5d0b438 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1809,8 +1809,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 			goto send_now;
 	}
 
-	/* Ok, it looks like it is advisable to defer.  */
-	tp->tso_deferred = 1 | (jiffies << 1);
+	/* Ok, it looks like it is advisable to defer.
+	 * Do not rearm the timer if already set to not break TCP ACK clocking.
+	 */
+	if (!tp->tso_deferred)
+		tp->tso_deferred = 1 | (jiffies << 1);
 
 	return true;
 
-- 
cgit v1.1


From 9979a55a833883242e3a29f3596676edd7199c46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Mar 2013 14:38:28 +0000
Subject: net: remove a WARN_ON() in net_enable_timestamp()

The WARN_ON(in_interrupt()) in net_enable_timestamp() can get false
positive, in socket clone path, run from softirq context :

[ 3641.624425] WARNING: at net/core/dev.c:1532 net_enable_timestamp+0x7b/0x80()
[ 3641.668811] Call Trace:
[ 3641.671254]  <IRQ>  [<ffffffff80286817>] warn_slowpath_common+0x87/0xc0
[ 3641.677871]  [<ffffffff8028686a>] warn_slowpath_null+0x1a/0x20
[ 3641.683683]  [<ffffffff80742f8b>] net_enable_timestamp+0x7b/0x80
[ 3641.689668]  [<ffffffff80732ce5>] sk_clone_lock+0x425/0x450
[ 3641.695222]  [<ffffffff8078db36>] inet_csk_clone_lock+0x16/0x170
[ 3641.701213]  [<ffffffff807ae449>] tcp_create_openreq_child+0x29/0x820
[ 3641.707663]  [<ffffffff807d62e2>] ? ipt_do_table+0x222/0x670
[ 3641.713354]  [<ffffffff807aaf5b>] tcp_v4_syn_recv_sock+0xab/0x3d0
[ 3641.719425]  [<ffffffff807af63a>] tcp_check_req+0x3da/0x530
[ 3641.724979]  [<ffffffff8078b400>] ? inet_hashinfo_init+0x60/0x80
[ 3641.730964]  [<ffffffff807ade6f>] ? tcp_v4_rcv+0x79f/0xbe0
[ 3641.736430]  [<ffffffff807ab9bd>] tcp_v4_do_rcv+0x38d/0x4f0
[ 3641.741985]  [<ffffffff807ae14a>] tcp_v4_rcv+0xa7a/0xbe0

Its safe at this point because the parent socket owns a reference
on the netstamp_needed, so we cant have a 0 -> 1 transition, which
requires to lock a mutex.

Instead of refining the check, lets remove it, as all known callers
are safe. If it ever changes in the future, static_key_slow_inc()
will complain anyway.

Reported-by: Laurent Chavey <chavey@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d540ced..b13e5c7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1545,7 +1545,6 @@ void net_enable_timestamp(void)
 		return;
 	}
 #endif
-	WARN_ON(in_interrupt());
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
-- 
cgit v1.1


From 4a7df340ed1bac190c124c1601bfc10cde9fb4fb Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Fri, 22 Mar 2013 19:14:07 +0000
Subject: 8021q: fix a potential use-after-free

vlan_vid_del() could possibly free ->vlan_info after a RCU grace
period, however, we may still refer to the freed memory area
by 'grp' pointer. Found by code inspection.

This patch moves vlan_vid_del() as behind as possible.

Cc: Patrick McHardy <kaber@trash.net>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a187144..85addcd 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -86,13 +86,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 
 	grp = &vlan_info->grp;
 
-	/* Take it out of our own structures, but be sure to interlock with
-	 * HW accelerating devices or SW vlan input packet processing if
-	 * VLAN is not 0 (leave it there for 802.1p).
-	 */
-	if (vlan_id)
-		vlan_vid_del(real_dev, vlan_id);
-
 	grp->nr_vlan_devs--;
 
 	if (vlan->flags & VLAN_FLAG_MVRP)
@@ -114,6 +107,13 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 		vlan_gvrp_uninit_applicant(real_dev);
 	}
 
+	/* Take it out of our own structures, but be sure to interlock with
+	 * HW accelerating devices or SW vlan input packet processing if
+	 * VLAN is not 0 (leave it there for 802.1p).
+	 */
+	if (vlan_id)
+		vlan_vid_del(real_dev, vlan_id);
+
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
 }
-- 
cgit v1.1


From 9b46922e15f4d9d2aedcd320c3b7f7f54d956da7 Mon Sep 17 00:00:00 2001
From: Hong zhi guo <honkiko@gmail.com>
Date: Sat, 23 Mar 2013 02:27:50 +0000
Subject: bridge: fix crash when set mac address of br interface

When I tried to set mac address of a bridge interface to a mac
address which already learned on this bridge, I got system hang.

The cause is straight forward: function br_fdb_change_mac_address
calls fdb_insert with NULL source nbp. Then an fdb lookup is
performed. If an fdb entry is found and it's local, it's OK. But
if it's not local, source is dereferenced for printk without NULL
check.

Signed-off-by: Hong Zhiguo <honkiko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b0812c9..bab338e 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -423,7 +423,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 			return 0;
 		br_warn(br, "adding interface %s with same address "
 		       "as a received packet\n",
-		       source->dev->name);
+		       source ? source->dev->name : br->dev->name);
 		fdb_delete(br, fdb);
 	}
 
-- 
cgit v1.1


From 7ebe183c6d444ef5587d803b64a1f4734b18c564 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sun, 24 Mar 2013 10:42:25 +0000
Subject: tcp: undo spurious timeout after SACK reneging

On SACK reneging the sender immediately retransmits and forces a
timeout but disables Eifel (undo). If the (buggy) receiver does not
drop any packet this can trigger a false slow-start retransmit storm
driven by the ACKs of the original packets. This can be detected with
undo and TCP timestamps.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0d9bdac..3bd55ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2059,11 +2059,8 @@ void tcp_enter_loss(struct sock *sk, int how)
 	if (tcp_is_reno(tp))
 		tcp_reset_reno_sack(tp);
 
-	if (!how) {
-		/* Push undo marker, if it was plain RTO and nothing
-		 * was retransmitted. */
-		tp->undo_marker = tp->snd_una;
-	} else {
+	tp->undo_marker = tp->snd_una;
+	if (how) {
 		tp->sacked_out = 0;
 		tp->fackets_out = 0;
 	}
-- 
cgit v1.1


From 1166fde6a923c30f4351515b6a9a1efc513e7d00 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 25 Mar 2013 11:23:40 -0400
Subject: SUNRPC: Add barriers to ensure read ordering in
 rpc_wake_up_task_queue_locked

We need to be careful when testing task->tk_waitqueue in
rpc_wake_up_task_queue_locked, because it can be changed while we
are holding the queue->lock.
By adding appropriate memory barriers, we can ensure that it is safe to
test task->tk_waitqueue for equality if the RPC_TASK_QUEUED bit is set.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
---
 net/sunrpc/sched.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index fb20f25..f8529fc 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
 		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
 	task->tk_waitqueue = queue;
 	queue->qlen++;
+	/* barrier matches the read in rpc_wake_up_task_queue_locked() */
+	smp_wmb();
 	rpc_set_queued(task);
 
 	dprintk("RPC: %5u added to queue %p \"%s\"\n",
@@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
  */
 static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue)
-		__rpc_do_wake_up_task(queue, task);
+	if (RPC_IS_QUEUED(task)) {
+		smp_rmb();
+		if (task->tk_waitqueue == queue)
+			__rpc_do_wake_up_task(queue, task);
+	}
 }
 
 /*
-- 
cgit v1.1


From ded34e0fe8fe8c2d595bfa30626654e4b87621e0 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Mon, 25 Mar 2013 03:18:33 +0000
Subject: unix: fix a race condition in unix_release()

As reported by Jan, and others over the past few years, there is a
race condition caused by unix_release setting the sock->sk pointer
to NULL before properly marking the socket as dead/orphaned.  This
can cause a problem with the LSM hook security_unix_may_send() if
there is another socket attempting to write to this partially
released socket in between when sock->sk is set to NULL and it is
marked as dead/orphaned.  This patch fixes this by only setting
sock->sk to NULL after the socket has been marked as dead; I also
take the opportunity to make unix_release_sock() a void function
as it only ever returned 0/success.

Dave, I think this one should go on the -stable pile.

Special thanks to Jan for coming up with a reproducer for this
problem.

Reported-by: Jan Stancek <jan.stancek@gmail.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 51be64f..f153a8d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk)
 #endif
 }
 
-static int unix_release_sock(struct sock *sk, int embrion)
+static void unix_release_sock(struct sock *sk, int embrion)
 {
 	struct unix_sock *u = unix_sk(sk);
 	struct path path;
@@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion)
 
 	if (unix_tot_inflight)
 		unix_gc();		/* Garbage collect fds */
-
-	return 0;
 }
 
 static void init_peercred(struct sock *sk)
@@ -699,9 +697,10 @@ static int unix_release(struct socket *sock)
 	if (!sk)
 		return 0;
 
+	unix_release_sock(sk, 0);
 	sock->sk = NULL;
 
-	return unix_release_sock(sk, 0);
+	return 0;
 }
 
 static int unix_autobind(struct socket *sock)
-- 
cgit v1.1


From a79ca223e029aa4f09abb337accf1812c900a800 Mon Sep 17 00:00:00 2001
From: Hong Zhiguo <honkiko@gmail.com>
Date: Tue, 26 Mar 2013 01:52:45 +0800
Subject: ipv6: fix bad free of addrconf_init_net

Signed-off-by: Hong Zhiguo <honkiko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f2c7e61..26512250 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4784,26 +4784,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 
 static int __net_init addrconf_init_net(struct net *net)
 {
-	int err;
+	int err = -ENOMEM;
 	struct ipv6_devconf *all, *dflt;
 
-	err = -ENOMEM;
-	all = &ipv6_devconf;
-	dflt = &ipv6_devconf_dflt;
+	all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
+	if (all == NULL)
+		goto err_alloc_all;
 
-	if (!net_eq(net, &init_net)) {
-		all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
-		if (all == NULL)
-			goto err_alloc_all;
+	dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+	if (dflt == NULL)
+		goto err_alloc_dflt;
 
-		dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
-		if (dflt == NULL)
-			goto err_alloc_dflt;
-	} else {
-		/* these will be inherited by all namespaces */
-		dflt->autoconf = ipv6_defaults.autoconf;
-		dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
-	}
+	/* these will be inherited by all namespaces */
+	dflt->autoconf = ipv6_defaults.autoconf;
+	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
 
 	net->ipv6.devconf_all = all;
 	net->ipv6.devconf_dflt = dflt;
-- 
cgit v1.1


From 14134f6584212d585b310ce95428014b653dfaf6 Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Mon, 25 Mar 2013 17:02:04 +0000
Subject: af_unix: dont send SCM_CREDENTIAL when dest socket is NULL

SCM_SCREDENTIALS should apply to write() syscalls only either source or destination
socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong,
and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom).

Origionally-authored-by: Karel Srot <ksrot@redhat.com>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f153a8d..971282b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 	if (UNIXCB(skb).cred)
 		return;
 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
-	    !other->sk_socket ||
-	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+	    (other->sk_socket &&
+	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) {
 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
 		UNIXCB(skb).cred = get_current_cred();
 	}
-- 
cgit v1.1


From 330305cc4a6b0cb75c22fc01b8826f0ad755550f Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Sun, 24 Mar 2013 17:36:29 +0000
Subject: ipv4: Fix ip-header identification for gso packets.

ip-header id needs to be incremented even if IP_DF flag is set.
This behaviour was changed in commit 490ab08127cebc25e3a26
(IP_GRE: Fix IP-Identification).

Following patch fixes it so that identification is always
incremented.

Reported-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 68f6a94..c929d9c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1333,8 +1333,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				iph->frag_off |= htons(IP_MF);
 			offset += (skb->len - skb->mac_len - iph->ihl * 4);
 		} else  {
-			if (!(iph->frag_off & htons(IP_DF)))
-				iph->id = htons(id++);
+			iph->id = htons(id++);
 		}
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
-- 
cgit v1.1