From 0f97ede45e65ffb6eab856313e79b14b902bcfaa Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 2 Apr 2014 20:52:56 +0200
Subject: packet: report tx_dropped in packet_direct_xmit

Since commit 015f0688f57c ("net: net: add a core netdev->tx_dropped
counter"), we can now account for TX drops from within the core
stack instead of drivers.

Therefore, fix packet_direct_xmit() and increase drop count when we
encounter a problem before driver's xmit function was called (we do
not want to doubly account for it).

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 01039d2..c81a971 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -275,6 +275,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 
 	return ret;
 drop:
+	atomic_long_inc(&dev->tx_dropped);
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
 }
-- 
cgit v1.1


From 8e2f1a63f2217365223026422a2f8ba5967051d6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 2 Apr 2014 20:52:57 +0200
Subject: packet: fix packet_direct_xmit for BQL enabled drivers

Currently, in packet_direct_xmit() we test the assigned netdevice queue
for netif_xmit_frozen_or_stopped() before doing an ndo_start_xmit().

This can have the side-effect that BQL enabled drivers which make use
of netdev_tx_sent_queue() internally, set __QUEUE_STATE_STACK_XOFF from
within the stack and would not fully fill the device's TX ring from
packet sockets with PACKET_QDISC_BYPASS enabled.

Instead, use a test without BQL bit so that bursts can be absorbed
into the NICs TX ring. Fix and code suggested by Eric Dumazet, thanks!

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c81a971..72e0c71 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -261,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	local_bh_disable();
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_xmit_frozen_or_stopped(txq)) {
+	if (!netif_xmit_frozen_or_drv_stopped(txq)) {
 		ret = ops->ndo_start_xmit(skb, dev);
 		if (ret == NETDEV_TX_OK)
 			txq_trans_update(txq);
-- 
cgit v1.1


From d0290214de712150b118a532ded378a29255893b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 2 Apr 2014 23:09:31 +0200
Subject: net: add busy_poll device feature

Currently there is no way how to find out if a device supports busy
polling. So add a feature and make it dependent on ndo_busy_poll
existence.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c     | 7 +++++++
 net/core/ethtool.c | 1 +
 2 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7570634..75e88e0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5696,6 +5696,13 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		}
 	}
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	if (dev->netdev_ops->ndo_busy_poll)
+		features |= NETIF_F_BUSY_POLL;
+	else
+#endif
+		features &= ~NETIF_F_BUSY_POLL;
+
 	return features;
 }
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 30071de..640ba0e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -97,6 +97,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_RXFCS_BIT] =            "rx-fcs",
 	[NETIF_F_RXALL_BIT] =            "rx-all",
 	[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
+	[NETIF_F_BUSY_POLL_BIT] =        "busy-poll",
 };
 
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
-- 
cgit v1.1


From a5e7ac5ce134d8f72f59631011fafa7bbf7ca174 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 3 Apr 2014 08:28:01 +0200
Subject: tipc: fix regression bug where node events are not being generated

Commit 5902385a2440a55f005b266c93e0bb9398e5a62b ("tipc: obsolete
the remote management feature") introduces a regression where node
topology events are not being generated because the publication
that triggers this: {0, <z.c.n>, <z.c.n>} is no longer available.
This will break applications that rely on node events to discover
when nodes join/leave a cluster.

We fix this by advertising the node publication when TIPC enters
networking mode, and withdraws it upon shutdown.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/net.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/tipc/net.c b/net/tipc/net.c
index 0374a81..4c564eb 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -182,6 +182,8 @@ void tipc_net_start(u32 addr)
 	tipc_bclink_init();
 	write_unlock_bh(&tipc_net_lock);
 
+	tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr,
+			     TIPC_ZONE_SCOPE, 0, tipc_own_addr);
 	pr_info("Started in network mode\n");
 	pr_info("Own node address %s, network identity %u\n",
 		tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
@@ -192,6 +194,7 @@ void tipc_net_stop(void)
 	if (!tipc_own_addr)
 		return;
 
+	tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr);
 	write_lock_bh(&tipc_net_lock);
 	tipc_bearer_stop();
 	tipc_bclink_stop();
-- 
cgit v1.1


From e33d0ba8047b049c9262fdb1fcafb93cb52ceceb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 3 Apr 2014 09:28:10 -0700
Subject: net-gro: reset skb->truesize in napi_reuse_skb()

Recycling skb always had been very tough...

This time it appears GRO layer can accumulate skb->truesize
adjustments made by drivers when they attach a fragment to skb.

skb_gro_receive() can only subtract from skb->truesize the used part
of a fragment.

I spotted this problem seeing TcpExtPruneCalled and
TcpExtTCPRcvCollapsed that were unexpected with a recent kernel, where
TCP receive window should be sized properly to accept traffic coming
from a driver not overshooting skb->truesize.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 75e88e0..5777018 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4043,6 +4043,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	skb->vlan_tci = 0;
 	skb->dev = napi->dev;
 	skb->skb_iif = 0;
+	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
 
 	napi->skb = skb;
 }
-- 
cgit v1.1


From e5ac6eafba887821044c65b6fe59d9eb8b7c7f61 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 17 Mar 2014 22:27:50 +0100
Subject: netfilter: connlimit: fix UP build

cannot use ARRAY_SIZE() if spinlock_t is empty struct.

Fixes: 1442e7507dd597 ("netfilter: connlimit: use keyed locks")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_connlimit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 458464e..a6e129e 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -377,7 +377,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i)
+	for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
 		spin_lock_init(&info->data->locks[i]);
 
 	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
-- 
cgit v1.1


From e00b437b3d6d4d26ecd95108b575ee1bcfcb478f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 20 Mar 2014 11:53:39 +0100
Subject: netfilter: connlimit: move lock array out of struct connlimit_data

Eric points out that the locks can be global.
Moreover, both Jesper and Eric note that using only 32 locks increases
false sharing as only two cache lines are used.

This increases locks to 256 (16 cache lines assuming 64byte cacheline and
4 bytes per spinlock).

Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_connlimit.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index a6e129e..fbc66bb 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -32,8 +32,14 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
-#define CONNLIMIT_SLOTS		32
-#define CONNLIMIT_LOCK_SLOTS	32
+#define CONNLIMIT_SLOTS		256U
+
+#ifdef CONFIG_LOCKDEP
+#define CONNLIMIT_LOCK_SLOTS	8U
+#else
+#define CONNLIMIT_LOCK_SLOTS	256U
+#endif
+
 #define CONNLIMIT_GC_MAX_NODES	8
 
 /* we will save the tuples of all connections we care about */
@@ -49,10 +55,11 @@ struct xt_connlimit_rb {
 	union nf_inet_addr addr; /* search key */
 };
 
+static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
+
 struct xt_connlimit_data {
 	struct rb_root climit_root4[CONNLIMIT_SLOTS];
 	struct rb_root climit_root6[CONNLIMIT_SLOTS];
-	spinlock_t		locks[CONNLIMIT_LOCK_SLOTS];
 };
 
 static u_int32_t connlimit_rnd __read_mostly;
@@ -297,11 +304,11 @@ static int count_them(struct net *net,
 		root = &data->climit_root4[hash];
 	}
 
-	spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
+	spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 
 	count = count_tree(net, root, tuple, addr, mask, family);
 
-	spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
+	spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 
 	return count;
 }
@@ -377,9 +384,6 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
-		spin_lock_init(&info->data->locks[i]);
-
 	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
 		info->data->climit_root4[i] = RB_ROOT;
 	for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
@@ -435,11 +439,14 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
 
 static int __init connlimit_mt_init(void)
 {
-	int ret;
+	int ret, i;
 
 	BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
 	BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
 
+	for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
+		spin_lock_init(&xt_connlimit_locks[i]);
+
 	connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
 					   sizeof(struct xt_connlimit_conn),
 					   0, 0, NULL);
-- 
cgit v1.1


From a00e76349f3564bb8129fc0510dfd93248c3084d Mon Sep 17 00:00:00 2001
From: Alexey Perevalov <a.perevalov@samsung.com>
Date: Wed, 19 Mar 2014 10:58:42 +0400
Subject: netfilter: x_tables: allow to use cgroup match for LOCAL_IN nf hooks

This simple modification allows iptables to work with INPUT chain
in combination with cgroup module. It could be useful for counting
ingress traffic per cgroup with nfacct netfilter module. There
were no problems to count the egress traffic that way formerly.

It's possible to get classified sk_buff after PREROUTING, due to
socket lookup being done in early_demux (tcp_v4_early_demux). Also
it works for udp as well.

Trivial usage example, assuming we're in the same shell every step
and we have enough permissions:

1) Classic net_cls cgroup initialization:

  mkdir /sys/fs/cgroup/net_cls
  mount -t cgroup -o net_cls net_cls /sys/fs/cgroup/net_cls

2) Set up cgroup for interesting application:

  mkdir /sys/fs/cgroup/net_cls/wget
  echo 1 > /sys/fs/cgroup/net_cls/wget/net_cls.classid
  echo $BASHPID > /sys/fs/cgroup/net_cls/wget/cgroup.procs

3) Create kernel counters:

  nfacct add wget-cgroup-in
  iptables -A INPUT -m cgroup ! --cgroup 1 -m nfacct --nfacct-name wget-cgroup-in

  nfacct add wget-cgroup-out
  iptables -A OUTPUT -m cgroup ! --cgroup 1 -m nfacct --nfacct-name wget-cgroup-out

4) Network usage:

  wget https://www.kernel.org/pub/linux/kernel/v3.x/testing/linux-3.14-rc6.tar.xz

5) Check results:

  nfacct list

Cgroup approach is being used for the DataUsage (counting & blocking
traffic) feature for Samsung's modification of the Tizen OS.

Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_cgroup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 9a8e77e7..f4e8330 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -54,7 +54,8 @@ static struct xt_match cgroup_mt_reg __read_mostly = {
 	.matchsize  = sizeof(struct xt_cgroup_info),
 	.me         = THIS_MODULE,
 	.hooks      = (1 << NF_INET_LOCAL_OUT) |
-		      (1 << NF_INET_POST_ROUTING),
+		      (1 << NF_INET_POST_ROUTING) |
+		      (1 << NF_INET_LOCAL_IN),
 };
 
 static int __init cgroup_mt_init(void)
-- 
cgit v1.1


From b8ddd9eac8788b0aa9a9d4e09d76dc9e1667bb2c Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@parallels.com>
Date: Wed, 26 Mar 2014 14:37:59 +0400
Subject: netfilter: Add {ipt,ip6t}_osf aliases for xt_osf

There are no these aliases, so kernel can not request appropriate
match table:

$ iptables -I INPUT -p tcp -m osf --genre Windows --ttl 2 -j DROP
iptables: No chain/target/match by that name.

setsockopt() requests ipt_osf module, which is not present. Add
the aliases.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_osf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 7174611..c529161 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -422,4 +422,6 @@ module_exit(xt_osf_fini);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+MODULE_ALIAS("ipt_osf");
+MODULE_ALIAS("ip6t_osf");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
-- 
cgit v1.1


From a9bdd8365684810e3de804f8c51e52c26a5eccbb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 24 Mar 2014 15:10:37 +0100
Subject: netfilter: nf_tables: set names cannot be larger than 15 bytes

Currently, nf_tables trims off the set name if it exceeeds 15
bytes, so explicitly reject set names that are too large.

Reported-by: Giuseppe Longo <giuseppelng@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 33045a5..43ae487 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1946,7 +1946,8 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
 
 static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
 	[NFTA_SET_TABLE]		= { .type = NLA_STRING },
-	[NFTA_SET_NAME]			= { .type = NLA_STRING },
+	[NFTA_SET_NAME]			= { .type = NLA_STRING,
+					    .len = IFNAMSIZ - 1 },
 	[NFTA_SET_FLAGS]		= { .type = NLA_U32 },
 	[NFTA_SET_KEY_TYPE]		= { .type = NLA_U32 },
 	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
-- 
cgit v1.1


From 2fec6bb6f484b1a88b4a325724234d6cfd08c918 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 31 Mar 2014 12:26:39 +0200
Subject: netfilter: nf_tables: fix wrong format in request_module()

The intended format in request_module is %.*s instead of %*.s.

Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 43ae487..3fd159d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -152,8 +152,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
 #ifdef CONFIG_MODULES
 	if (autoload) {
 		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-		request_module("nft-chain-%u-%*.s", afi->family,
-			       nla_len(nla)-1, (const char *)nla_data(nla));
+		request_module("nft-chain-%u-%.*s", afi->family,
+			       nla_len(nla), (const char *)nla_data(nla));
 		nfnl_lock(NFNL_SUBSYS_NFTABLES);
 		type = __nf_tables_chain_type_lookup(afi->family, nla);
 		if (type != NULL)
-- 
cgit v1.1


From c58dd2dd443c26d856a168db108a0cd11c285bf3 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 4 Apr 2014 17:57:45 +0200
Subject: netfilter: Can't fail and free after table replacement

All xtables variants suffer from the defect that the copy_to_user()
to copy the counters to user memory may fail after the table has
already been exchanged and thus exposed. Return an error at this
point will result in freeing the already exposed table. Any
subsequent packet processing will result in a kernel panic.

We can't copy the counters before exposing the new tables as we
want provide the counter state after the old table has been
unhooked. Therefore convert this into a silent error.

Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 5 ++---
 net/ipv4/netfilter/arp_tables.c | 6 ++++--
 net/ipv4/netfilter/ip_tables.c  | 6 ++++--
 net/ipv6/netfilter/ip6_tables.c | 6 ++++--
 4 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 0e474b1..1059ed3 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1044,10 +1044,9 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 	if (repl->num_counters &&
 	   copy_to_user(repl->counters, counterstmp,
 	   repl->num_counters * sizeof(struct ebt_counter))) {
-		ret = -EFAULT;
+		/* Silent error, can't fail, new table is already in place */
+		net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n");
 	}
-	else
-		ret = 0;
 
 	/* decrease module count and free resources */
 	EBT_ENTRY_ITERATE(table->entries, table->entries_size,
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 59da7cd..f95b6f9 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1044,8 +1044,10 @@ static int __do_replace(struct net *net, const char *name,
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
-			 sizeof(struct xt_counters) * num_counters) != 0)
-		ret = -EFAULT;
+			 sizeof(struct xt_counters) * num_counters) != 0) {
+		/* Silent error, can't fail, new table is already in place */
+		net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+	}
 	vfree(counters);
 	xt_table_unlock(t);
 	return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 718dfbd..99e810f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1231,8 +1231,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
-			 sizeof(struct xt_counters) * num_counters) != 0)
-		ret = -EFAULT;
+			 sizeof(struct xt_counters) * num_counters) != 0) {
+		/* Silent error, can't fail, new table is already in place */
+		net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+	}
 	vfree(counters);
 	xt_table_unlock(t);
 	return ret;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 710238f..e080fbb 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1241,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
-			 sizeof(struct xt_counters) * num_counters) != 0)
-		ret = -EFAULT;
+			 sizeof(struct xt_counters) * num_counters) != 0) {
+		/* Silent error, can't fail, new table is already in place */
+		net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
+	}
 	vfree(counters);
 	xt_table_unlock(t);
 	return ret;
-- 
cgit v1.1


From 5f9fde5f799df7156eeb3fa58282e9fd2f38a5f8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sat, 5 Apr 2014 01:04:03 +0200
Subject: net: filter: be more defensive on div/mod by X==0

The old interpreter behaviour was that we returned with 0
whenever we found a division by 0 would take place. In the new
interpreter we would currently just skip that instead and
continue execution.

It's true that a value of 0 as return might not be appropriate
in all cases, but current users (socket filters -> drop
packet, seccomp -> SECCOMP_RET_KILL, cls_bpf -> unclassified,
etc) seem fine with that behaviour. Better this than undefined
BPF program behaviour as it's expected that A contains the
result of the division. In future, as more use cases open up,
we could further adapt this return value to our needs, if
necessary.

So reintroduce return of 0 for division by 0 as in the old
interpreter. Also in case of K which is guaranteed to be 32bit
wide, sk_chk_filter() already takes care of preventing division
by 0 invoked through K, so we can generally spare us these tests.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Reviewed-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 765556b..e08b382 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -295,43 +295,43 @@ select_insn:
 		(*(s64 *) &A) >>= K;
 		CONT;
 	BPF_ALU64_BPF_MOD_BPF_X:
+		if (unlikely(X == 0))
+			return 0;
 		tmp = A;
-		if (X)
-			A = do_div(tmp, X);
+		A = do_div(tmp, X);
 		CONT;
 	BPF_ALU_BPF_MOD_BPF_X:
+		if (unlikely(X == 0))
+			return 0;
 		tmp = (u32) A;
-		if (X)
-			A = do_div(tmp, (u32) X);
+		A = do_div(tmp, (u32) X);
 		CONT;
 	BPF_ALU64_BPF_MOD_BPF_K:
 		tmp = A;
-		if (K)
-			A = do_div(tmp, K);
+		A = do_div(tmp, K);
 		CONT;
 	BPF_ALU_BPF_MOD_BPF_K:
 		tmp = (u32) A;
-		if (K)
-			A = do_div(tmp, (u32) K);
+		A = do_div(tmp, (u32) K);
 		CONT;
 	BPF_ALU64_BPF_DIV_BPF_X:
-		if (X)
-			do_div(A, X);
+		if (unlikely(X == 0))
+			return 0;
+		do_div(A, X);
 		CONT;
 	BPF_ALU_BPF_DIV_BPF_X:
+		if (unlikely(X == 0))
+			return 0;
 		tmp = (u32) A;
-		if (X)
-			do_div(tmp, (u32) X);
+		do_div(tmp, (u32) X);
 		A = (u32) tmp;
 		CONT;
 	BPF_ALU64_BPF_DIV_BPF_K:
-		if (K)
-			do_div(A, K);
+		do_div(A, K);
 		CONT;
 	BPF_ALU_BPF_DIV_BPF_K:
 		tmp = (u32) A;
-		if (K)
-			do_div(tmp, (u32) K);
+		do_div(tmp, (u32) K);
 		A = (u32) tmp;
 		CONT;
 	BPF_ALU_BPF_END_BPF_TO_BE:
-- 
cgit v1.1


From 6c6a9855560d3cfa93120f2ab07b8ea0110f953d Mon Sep 17 00:00:00 2001
From: Jean Sacren <sakiwit@gmail.com>
Date: Sat, 5 Apr 2014 00:29:01 -0600
Subject: mac802154: fix duplicate #include headers

The commit e6278d92005e ("mac802154: use header operations to
create/parse headers") included the header

		net/ieee802154_netdev.h

which had been included by the commit b70ab2e87f17 ("ieee802154:
enforce consistent endianness in the 802.15.4 stack"). Fix this
duplicate #include by deleting the latter one as the required header
has already been in place.

Signed-off-by: Jean Sacren <sakiwit@gmail.com>
Cc: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Cc: linux-zigbee-devel@lists.sourceforge.net
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac802154/mib.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 153bd1d..f0991f2 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -26,7 +26,6 @@
 #include <net/mac802154.h>
 #include <net/ieee802154_netdev.h>
 #include <net/wpan-phy.h>
-#include <net/ieee802154_netdev.h>
 
 #include "mac802154.h"
 
-- 
cgit v1.1


From 065d7e39563b092dbb429373bd8f0f2295768cea Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 6 Apr 2014 15:56:14 +0200
Subject: tipc: Let tipc_release() return 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/tipc/socket.c: In function ‘tipc_release’:
net/tipc/socket.c:352: warning: ‘res’ is used uninitialized in this function

Introduced by commit 24be34b5a0c9114541891d29dff1152bb1a8df34 ("tipc:
eliminate upcall function pointers between port and socket"), which
removed the sole initializer of "res".

Just return 0 to fix it.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 29b7f26..adc12e2 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -301,7 +301,6 @@ static int tipc_release(struct socket *sock)
 	struct tipc_sock *tsk;
 	struct tipc_port *port;
 	struct sk_buff *buf;
-	int res;
 
 	/*
 	 * Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -349,7 +348,7 @@ static int tipc_release(struct socket *sock)
 	sock_put(sk);
 	sock->sk = NULL;
 
-	return res;
+	return 0;
 }
 
 /**
-- 
cgit v1.1


From 6f25cd47dcd2b9912c6e52aa833ba1614f7b5086 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 7 Apr 2014 17:18:30 +0200
Subject: pktgen: fix xmit test for BQL enabled devices

Similarly as in commit 8e2f1a63f221 ("packet: fix packet_direct_xmit
for BQL enabled drivers"), we test for __QUEUE_STATE_STACK_XOFF bit
in pktgen's xmit, which would not fully fill the device's TX ring for
BQL drivers that use netdev_tx_sent_queue(). Fix is to use, similarly
as we do in packet sockets, netif_xmit_frozen_or_drv_stopped() test.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d0dac57..d068ec2 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3340,7 +3340,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	__netif_tx_lock_bh(txq);
 
-	if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
+	if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) {
 		ret = NETDEV_TX_BUSY;
 		pkt_dev->last_ok = 0;
 		goto unlock;
-- 
cgit v1.1


From 6859e7df6d9045a461412777e63bd8cef12f9705 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Mon, 7 Apr 2014 11:25:12 +0200
Subject: netdev: remove potentially harmful checks

Currently we're checking a variable for != NULL after actually
dereferencing it, in netdev_lower_get_next_private*().

It's counter-intuitive at best, and can lead to faulty usage (as it implies
that the variable can be NULL), so fix it by removing the useless checks.

Reported-by: Daniel Borkmann <dborkman@redhat.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Nicolas Dichtel <nicolas.dichtel@6wind.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: stephen hemminger <stephen@networkplumber.org>
CC: Jerry Chu <hkchu@google.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5777018..14dac06 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4589,8 +4589,7 @@ void *netdev_lower_get_next_private(struct net_device *dev,
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 
-	if (iter)
-		*iter = lower->list.next;
+	*iter = lower->list.next;
 
 	return lower->private;
 }
@@ -4618,8 +4617,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	if (&lower->list == &dev->adj_list.lower)
 		return NULL;
 
-	if (iter)
-		*iter = &lower->list;
+	*iter = &lower->list;
 
 	return lower->private;
 }
-- 
cgit v1.1


From 52c35befb69b005c3fc5afdaae3a5717ad013411 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 8 Apr 2014 17:26:13 +0200
Subject: net: sctp: wake up all assocs if sndbuf policy is per socket

SCTP charges chunks for wmem accounting via skb->truesize in
sctp_set_owner_w(), and sctp_wfree() respectively as the
reverse operation. If a sender runs out of wmem, it needs to
wait via sctp_wait_for_sndbuf(), and gets woken up by a call
to __sctp_write_space() mostly via sctp_wfree().

__sctp_write_space() is being called per association. Although
we assign sk->sk_write_space() to sctp_write_space(), which
is then being done per socket, it is only used if send space
is increased per socket option (SO_SNDBUF), as SOCK_USE_WRITE_QUEUE
is set and therefore not invoked in sock_wfree().

Commit 4c3a5bdae293 ("sctp: Don't charge for data in sndbuf
again when transmitting packet") fixed an issue where in case
sctp_packet_transmit() manages to queue up more than sndbuf
bytes, sctp_wait_for_sndbuf() will never be woken up again
unless it is interrupted by a signal. However, a still
remaining issue is that if net.sctp.sndbuf_policy=0, that is
accounting per socket, and one-to-many sockets are in use,
the reclaimed write space from sctp_wfree() is 'unfairly'
handed back on the server to the association that is the lucky
one to be woken up again via __sctp_write_space(), while
the remaining associations are never be woken up again
(unless by a signal).

The effect disappears with net.sctp.sndbuf_policy=1, that
is wmem accounting per association, as it guarantees a fair
share of wmem among associations.

Therefore, if we have reclaimed memory in case of per socket
accounting, wake all related associations to a socket in a
fair manner, that is, traverse the socket association list
starting from the current neighbour of the association and
issue a __sctp_write_space() to everyone until we end up
waking ourselves. This guarantees that no association is
preferred over another and even if more associations are
taken into the one-to-many session, all receivers will get
messages from the server and are not stalled forever on
high load. This setting still leaves the advantage of per
socket accounting in touch as an association can still use
up global limits if unused by others.

Fixes: 4eb701dfc618 ("[SCTP] Fix SCTP sendbuffer accouting.")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 981aaf8..5f83a6a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6593,6 +6593,40 @@ static void __sctp_write_space(struct sctp_association *asoc)
 	}
 }
 
+static void sctp_wake_up_waiters(struct sock *sk,
+				 struct sctp_association *asoc)
+{
+	struct sctp_association *tmp = asoc;
+
+	/* We do accounting for the sndbuf space per association,
+	 * so we only need to wake our own association.
+	 */
+	if (asoc->ep->sndbuf_policy)
+		return __sctp_write_space(asoc);
+
+	/* Accounting for the sndbuf space is per socket, so we
+	 * need to wake up others, try to be fair and in case of
+	 * other associations, let them have a go first instead
+	 * of just doing a sctp_write_space() call.
+	 *
+	 * Note that we reach sctp_wake_up_waiters() only when
+	 * associations free up queued chunks, thus we are under
+	 * lock and the list of associations on a socket is
+	 * guaranteed not to change.
+	 */
+	for (tmp = list_next_entry(tmp, asocs); 1;
+	     tmp = list_next_entry(tmp, asocs)) {
+		/* Manually skip the head element. */
+		if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs))
+			continue;
+		/* Wake up association. */
+		__sctp_write_space(tmp);
+		/* We've reached the end. */
+		if (tmp == asoc)
+			break;
+	}
+}
+
 /* Do accounting for the sndbuf space.
  * Decrement the used sndbuf space of the corresponding association by the
  * data size which was just transmitted(freed).
@@ -6620,7 +6654,7 @@ static void sctp_wfree(struct sk_buff *skb)
 	sk_mem_uncharge(sk, skb->truesize);
 
 	sock_wfree(skb);
-	__sctp_write_space(asoc);
+	sctp_wake_up_waiters(sk, asoc);
 
 	sctp_association_put(asoc);
 }
-- 
cgit v1.1