From 92a0acce186cde8ead56c6915d9479773673ea1a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Feb 2009 21:24:05 -0800
Subject: net: Kill skb_truesize_check(), it only catches false-positives.

A long time ago we had bugs, primarily in TCP, where we would modify
skb->truesize (for TSO queue collapsing) in ways which would corrupt
the socket memory accounting.

skb_truesize_check() was added in order to try and catch this error
more systematically.

However this debugging check has morphed into a Frankenstein of sorts
and these days it does nothing other than catch false-positives.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 8 --------
 net/core/sock.c   | 1 -
 2 files changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index da74b84..c6a6b16 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -143,14 +143,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	BUG();
 }
 
-void skb_truesize_bug(struct sk_buff *skb)
-{
-	WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) "
-	       "len=%u, sizeof(sk_buff)=%Zd\n",
-	       skb->truesize, skb->len, sizeof(struct sk_buff));
-}
-EXPORT_SYMBOL(skb_truesize_bug);
-
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
  *	[BEEP] leaks.
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f2e133..6e4f14d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1137,7 +1137,6 @@ void sock_rfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 
-	skb_truesize_check(skb);
 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 	sk_mem_uncharge(skb->sk, skb->truesize);
 }
-- 
cgit v1.1


From 4aa3b2ee1945ed082430ae1fb988d60eef64ca07 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 18 Feb 2009 15:28:46 +0100
Subject: netfilter: nf_conntrack_ipv6: fix nf_log_packet message in icmpv6
 conntrack

This patch fixes a trivial typo that was adding a new line at end of
the nf_log_packet() prefix. It also make the logging conditionnal by
adding a LOG_INVALID test.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c323643..72dbb6d 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -201,8 +201,9 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
-		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
-			      "nf_ct_icmpv6: ICMPv6 checksum failed\n");
+		if (LOG_INVALID(net, IPPROTO_ICMPV6))
+			nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+				      "nf_ct_icmpv6: ICMPv6 checksum failed ");
 		return -NF_ACCEPT;
 	}
 
-- 
cgit v1.1


From 5ca431f9ae8db8c6edb9c64bebe6d6521077afd6 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 18 Feb 2009 15:29:23 +0100
Subject: netfilter: nfnetlink_log: fix per-rule qthreshold override

In NFLOG the per-rule qthreshold should overrides per-instance only
it is set. With current code, the per-rule qthreshold is 1 if not set
and it overrides the per-instance qthreshold.

This patch modifies the default xt_NFLOG threshold from 1 to
0. Thus a value of 0 means there is no per-rule setting and the instance
parameter has to apply.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fa49dc7..580b837 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -590,8 +590,10 @@ nfulnl_log_packet(u_int8_t pf,
 
 	qthreshold = inst->qthreshold;
 	/* per-rule qthreshold overrides per-instance */
-	if (qthreshold > li->u.ulog.qthreshold)
-		qthreshold = li->u.ulog.qthreshold;
+	if (li->u.ulog.qthreshold)
+		if (qthreshold > li->u.ulog.qthreshold)
+			qthreshold = li->u.ulog.qthreshold;
+
 
 	switch (inst->copy_mode) {
 	case NFULNL_COPY_META:
-- 
cgit v1.1


From 2c6764b743f9d25dd0806a417f06920dcbd0f599 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 18 Feb 2009 15:29:49 +0100
Subject: netfilter: nfnetlink_log: fix timeout handling

NFLOG timeout was computed in timer by doing:

    flushtimeout*HZ/100

Default value of flushtimeout was HZ (for 1 second delay). This was
wrong for non 100HZ computer. This patch modify the default delay by
using 100 instead of HZ.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 580b837..c712e9f 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -39,7 +39,7 @@
 #endif
 
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
-#define NFULNL_TIMEOUT_DEFAULT 	HZ	/* every second */
+#define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
 #define NFULNL_COPY_RANGE_MAX	0xFFFF	/* max packet size is limited by 16-bit struct nfattr nfa_len field */
 
-- 
cgit v1.1


From eb132205ca2f7ad44d8c8c482815b6911200b6a0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 18 Feb 2009 16:42:19 +0100
Subject: netfilter: make proc/net/ip* print names from foreign NFPROTO

When extensions were moved to the NFPROTO_UNSPEC wildcard in
ab4f21e6fb1c09b13c4c3cb8357babe8223471bd, they disappeared from the
procfs files.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 199 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 142 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bfbf521..5baccfa 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -827,59 +827,143 @@ static const struct file_operations xt_table_ops = {
 	.release = seq_release_net,
 };
 
-static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
+/*
+ * Traverse state for ip{,6}_{tables,matches} for helping crossing
+ * the multi-AF mutexes.
+ */
+struct nf_mttg_trav {
+	struct list_head *head, *curr;
+	uint8_t class, nfproto;
+};
+
+enum {
+	MTTG_TRAV_INIT,
+	MTTG_TRAV_NFP_UNSPEC,
+	MTTG_TRAV_NFP_SPEC,
+	MTTG_TRAV_DONE,
+};
+
+static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
+    bool is_target)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
+	static const uint8_t next_class[] = {
+		[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
+		[MTTG_TRAV_NFP_SPEC]   = MTTG_TRAV_DONE,
+	};
+	struct nf_mttg_trav *trav = seq->private;
+
+	switch (trav->class) {
+	case MTTG_TRAV_INIT:
+		trav->class = MTTG_TRAV_NFP_UNSPEC;
+		mutex_lock(&xt[NFPROTO_UNSPEC].mutex);
+		trav->head = trav->curr = is_target ?
+			&xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match;
+ 		break;
+	case MTTG_TRAV_NFP_UNSPEC:
+		trav->curr = trav->curr->next;
+		if (trav->curr != trav->head)
+			break;
+		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
+		mutex_lock(&xt[trav->nfproto].mutex);
+		trav->head = trav->curr = is_target ?
+			&xt[trav->nfproto].target : &xt[trav->nfproto].match;
+		trav->class = next_class[trav->class];
+		break;
+	case MTTG_TRAV_NFP_SPEC:
+		trav->curr = trav->curr->next;
+		if (trav->curr != trav->head)
+			break;
+		/* fallthru, _stop will unlock */
+	default:
+		return NULL;
+	}
 
-	mutex_lock(&xt[af].mutex);
-	return seq_list_start(&xt[af].match, *pos);
+	if (ppos != NULL)
+		++*ppos;
+	return trav;
 }
 
-static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
+    bool is_target)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
+	struct nf_mttg_trav *trav = seq->private;
+	unsigned int j;
 
-	return seq_list_next(v, &xt[af].match, pos);
+	trav->class = MTTG_TRAV_INIT;
+	for (j = 0; j < *pos; ++j)
+		if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL)
+			return NULL;
+	return trav;
 }
 
-static void xt_match_seq_stop(struct seq_file *seq, void *v)
+static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
 {
-	struct proc_dir_entry *pde = seq->private;
-	u_int16_t af = (unsigned long)pde->data;
+	struct nf_mttg_trav *trav = seq->private;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
+		break;
+	case MTTG_TRAV_NFP_SPEC:
+		mutex_unlock(&xt[trav->nfproto].mutex);
+		break;
+	}
+}
 
-	mutex_unlock(&xt[af].mutex);
+static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return xt_mttg_seq_start(seq, pos, false);
 }
 
-static int xt_match_seq_show(struct seq_file *seq, void *v)
+static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
 {
-	struct xt_match *match = list_entry(v, struct xt_match, list);
+	return xt_mttg_seq_next(seq, v, ppos, false);
+}
 
-	if (strlen(match->name))
-		return seq_printf(seq, "%s\n", match->name);
-	else
-		return 0;
+static int xt_match_seq_show(struct seq_file *seq, void *v)
+{
+	const struct nf_mttg_trav *trav = seq->private;
+	const struct xt_match *match;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+	case MTTG_TRAV_NFP_SPEC:
+		if (trav->curr == trav->head)
+			return 0;
+		match = list_entry(trav->curr, struct xt_match, list);
+		return (*match->name == '\0') ? 0 :
+		       seq_printf(seq, "%s\n", match->name);
+	}
+	return 0;
 }
 
 static const struct seq_operations xt_match_seq_ops = {
 	.start	= xt_match_seq_start,
 	.next	= xt_match_seq_next,
-	.stop	= xt_match_seq_stop,
+	.stop	= xt_mttg_seq_stop,
 	.show	= xt_match_seq_show,
 };
 
 static int xt_match_open(struct inode *inode, struct file *file)
 {
+	struct seq_file *seq;
+	struct nf_mttg_trav *trav;
 	int ret;
 
-	ret = seq_open(file, &xt_match_seq_ops);
-	if (!ret) {
-		struct seq_file *seq = file->private_data;
+	trav = kmalloc(sizeof(*trav), GFP_KERNEL);
+	if (trav == NULL)
+		return -ENOMEM;
 
-		seq->private = PDE(inode);
+	ret = seq_open(file, &xt_match_seq_ops);
+	if (ret < 0) {
+		kfree(trav);
+		return ret;
 	}
-	return ret;
+
+	seq = file->private_data;
+	seq->private = trav;
+	trav->nfproto = (unsigned long)PDE(inode)->data;
+	return 0;
 }
 
 static const struct file_operations xt_match_ops = {
@@ -887,62 +971,63 @@ static const struct file_operations xt_match_ops = {
 	.open	 = xt_match_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_private,
 };
 
 static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
-
-	mutex_lock(&xt[af].mutex);
-	return seq_list_start(&xt[af].target, *pos);
+	return xt_mttg_seq_start(seq, pos, true);
 }
 
-static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
-
-	return seq_list_next(v, &xt[af].target, pos);
-}
-
-static void xt_target_seq_stop(struct seq_file *seq, void *v)
-{
-	struct proc_dir_entry *pde = seq->private;
-	u_int16_t af = (unsigned long)pde->data;
-
-	mutex_unlock(&xt[af].mutex);
+	return xt_mttg_seq_next(seq, v, ppos, true);
 }
 
 static int xt_target_seq_show(struct seq_file *seq, void *v)
 {
-	struct xt_target *target = list_entry(v, struct xt_target, list);
-
-	if (strlen(target->name))
-		return seq_printf(seq, "%s\n", target->name);
-	else
-		return 0;
+	const struct nf_mttg_trav *trav = seq->private;
+	const struct xt_target *target;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+	case MTTG_TRAV_NFP_SPEC:
+		if (trav->curr == trav->head)
+			return 0;
+		target = list_entry(trav->curr, struct xt_target, list);
+		return (*target->name == '\0') ? 0 :
+		       seq_printf(seq, "%s\n", target->name);
+	}
+	return 0;
 }
 
 static const struct seq_operations xt_target_seq_ops = {
 	.start	= xt_target_seq_start,
 	.next	= xt_target_seq_next,
-	.stop	= xt_target_seq_stop,
+	.stop	= xt_mttg_seq_stop,
 	.show	= xt_target_seq_show,
 };
 
 static int xt_target_open(struct inode *inode, struct file *file)
 {
+	struct seq_file *seq;
+	struct nf_mttg_trav *trav;
 	int ret;
 
-	ret = seq_open(file, &xt_target_seq_ops);
-	if (!ret) {
-		struct seq_file *seq = file->private_data;
+	trav = kmalloc(sizeof(*trav), GFP_KERNEL);
+	if (trav == NULL)
+		return -ENOMEM;
 
-		seq->private = PDE(inode);
+	ret = seq_open(file, &xt_target_seq_ops);
+	if (ret < 0) {
+		kfree(trav);
+		return ret;
 	}
-	return ret;
+
+	seq = file->private_data;
+	seq->private = trav;
+	trav->nfproto = (unsigned long)PDE(inode)->data;
+	return 0;
 }
 
 static const struct file_operations xt_target_ops = {
@@ -950,7 +1035,7 @@ static const struct file_operations xt_target_ops = {
 	.open	 = xt_target_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_private,
 };
 
 #define FORMAT_TABLES	"_tables_names"
-- 
cgit v1.1


From 5209921cf15452cbe43097afce11d2846630cb51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 18 Feb 2009 17:45:44 -0800
Subject: tcp: remove obsoleted comment about different passes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is obsolete since the passes got combined.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dda42f0..da2c3b8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2023,7 +2023,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		last_lost = tp->snd_una;
 	}
 
-	/* First pass: retransmit lost packets. */
 	tcp_for_write_queue_from(skb, sk) {
 		__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
-- 
cgit v1.1


From 486a87f1e5624096bd1c09e9e716239597d48dca Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Sun, 22 Feb 2009 00:07:53 -0800
Subject: netns: fix double free at netns creation

This patch fix a double free when a network namespace fails.
The previous code does a kfree of the net_generic structure when
one of the init subsystem initialization fails.
The 'setup_net' function does kfree(ng) and returns an error.
The caller, 'copy_net_ns', call net_free on error, and this one
calls kfree(net->gen), making this pointer freed twice.

This patch make the code symetric, the net_alloc does the net_generic
allocation and the net_free frees the net_generic.

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 86 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 55 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 55151fa..b0767ab 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -32,24 +32,14 @@ static __net_init int setup_net(struct net *net)
 {
 	/* Must be called with net_mutex held */
 	struct pernet_operations *ops;
-	int error;
-	struct net_generic *ng;
+	int error = 0;
 
 	atomic_set(&net->count, 1);
+
 #ifdef NETNS_REFCNT_DEBUG
 	atomic_set(&net->use_count, 0);
 #endif
 
-	error = -ENOMEM;
-	ng = kzalloc(sizeof(struct net_generic) +
-			INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL);
-	if (ng == NULL)
-		goto out;
-
-	ng->len = INITIAL_NET_GEN_PTRS;
-	rcu_assign_pointer(net->gen, ng);
-
-	error = 0;
 	list_for_each_entry(ops, &pernet_list, list) {
 		if (ops->init) {
 			error = ops->init(net);
@@ -70,7 +60,6 @@ out_undo:
 	}
 
 	rcu_barrier();
-	kfree(ng);
 	goto out;
 }
 
@@ -78,16 +67,43 @@ out_undo:
 static struct kmem_cache *net_cachep;
 static struct workqueue_struct *netns_wq;
 
-static struct net *net_alloc(void)
+static struct net_generic *net_alloc_generic(void)
 {
-	return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
+	struct net_generic *ng;
+	size_t generic_size = sizeof(struct net_generic) +
+		INITIAL_NET_GEN_PTRS * sizeof(void *);
+
+	ng = kzalloc(generic_size, GFP_KERNEL);
+	if (ng)
+		ng->len = INITIAL_NET_GEN_PTRS;
+
+	return ng;
 }
 
-static void net_free(struct net *net)
+static struct net *net_alloc(void)
 {
+	struct net *net = NULL;
+	struct net_generic *ng;
+
+	ng = net_alloc_generic();
+	if (!ng)
+		goto out;
+
+	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 	if (!net)
-		return;
+		goto out_free;
+
+	rcu_assign_pointer(net->gen, ng);
+out:
+	return net;
+
+out_free:
+	kfree(ng);
+	goto out;
+}
 
+static void net_free(struct net *net)
+{
 #ifdef NETNS_REFCNT_DEBUG
 	if (unlikely(atomic_read(&net->use_count) != 0)) {
 		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
@@ -112,27 +128,28 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 	err = -ENOMEM;
 	new_net = net_alloc();
 	if (!new_net)
-		goto out;
+		goto out_err;
 
 	mutex_lock(&net_mutex);
 	err = setup_net(new_net);
-	if (err)
-		goto out_unlock;
-
-	rtnl_lock();
-	list_add_tail(&new_net->list, &net_namespace_list);
-	rtnl_unlock();
-
-
-out_unlock:
+	if (!err) {
+		rtnl_lock();
+		list_add_tail(&new_net->list, &net_namespace_list);
+		rtnl_unlock();
+	}
 	mutex_unlock(&net_mutex);
+
+	if (err)
+		goto out_free;
 out:
 	put_net(old_net);
-	if (err) {
-		net_free(new_net);
-		new_net = ERR_PTR(err);
-	}
 	return new_net;
+
+out_free:
+	net_free(new_net);
+out_err:
+	new_net = ERR_PTR(err);
+	goto out;
 }
 
 static void cleanup_net(struct work_struct *work)
@@ -188,6 +205,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 
 static int __init net_ns_init(void)
 {
+	struct net_generic *ng;
 	int err;
 
 	printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
@@ -202,6 +220,12 @@ static int __init net_ns_init(void)
 		panic("Could not create netns workq");
 #endif
 
+	ng = net_alloc_generic();
+	if (!ng)
+		panic("Could not allocate generic netns");
+
+	rcu_assign_pointer(init_net.gen, ng);
+
 	mutex_lock(&net_mutex);
 	err = setup_net(&init_net);
 
-- 
cgit v1.1


From ebe47d47b7b7fed72dabcce4717da727b4e2367d Mon Sep 17 00:00:00 2001
From: Clemens Noss <cnoss@gmx.de>
Date: Mon, 23 Feb 2009 15:37:35 -0800
Subject: netns: build fix for net_alloc_generic

net_alloc_generic was defined in #ifdef CONFIG_NET_NS, but used
unconditionally. Move net_alloc_generic out of #ifdef.

Signed-off-by: Clemens Noss <cnoss@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b0767ab..2adb1a7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -63,10 +63,6 @@ out_undo:
 	goto out;
 }
 
-#ifdef CONFIG_NET_NS
-static struct kmem_cache *net_cachep;
-static struct workqueue_struct *netns_wq;
-
 static struct net_generic *net_alloc_generic(void)
 {
 	struct net_generic *ng;
@@ -80,6 +76,10 @@ static struct net_generic *net_alloc_generic(void)
 	return ng;
 }
 
+#ifdef CONFIG_NET_NS
+static struct kmem_cache *net_cachep;
+static struct workqueue_struct *netns_wq;
+
 static struct net *net_alloc(void)
 {
 	struct net *net = NULL;
-- 
cgit v1.1


From 50fee1dec5d71b8a14c1b82f2f42e16adc227f8b Mon Sep 17 00:00:00 2001
From: Eugene Teo <eugeneteo@kernel.sg>
Date: Mon, 23 Feb 2009 15:38:41 -0800
Subject: net: amend the fix for SO_BSDCOMPAT gsopt infoleak

The fix for CVE-2009-0676 (upstream commit df0bca04) is incomplete. Note
that the same problem of leaking kernel memory will reappear if someone
on some architecture uses struct timeval with some internal padding (for
example tv_sec 64-bit and tv_usec 32-bit) --- then, you are going to
leak the padded bytes to userspace.

Signed-off-by: Eugene Teo <eugeneteo@kernel.sg>
Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 6e4f14d..5f97caa 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -696,7 +696,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	if (len < 0)
 		return -EINVAL;
 
-	v.val = 0;
+	memset(&v, 0, sizeof(v));
 
 	switch(optname) {
 	case SO_DEBUG:
-- 
cgit v1.1


From 325fb5b4d26038cba665dd0d8ee09555321061f0 Mon Sep 17 00:00:00 2001
From: Josef Drexler <joe-lk@ttdpatch.net>
Date: Tue, 24 Feb 2009 14:53:12 +0100
Subject: netfilter: xt_recent: fix proc-file addition/removal of IPv4
 addresses

Fix regression introduded by commit 079aa88 (netfilter: xt_recent: IPv6 support):

From http://bugzilla.kernel.org/show_bug.cgi?id=12753:

Problem Description:
An uninitialized buffer causes IPv4 addresses added manually (via the +IP
command to the proc interface) to never match any packets. Similarly, the -IP
command fails to remove IPv4 addresses.

Details:
In the function recent_entry_lookup, the xt_recent module does comparisons of
the entire nf_inet_addr union value, both for IPv4 and IPv6 addresses. For
addresses initialized from actual packets the remaining 12 bytes not occupied
by the IPv4 are zeroed so this works correctly. However when setting the
nf_inet_addr addr variable in the recent_mt_proc_write function, only the IPv4
bytes are initialized and the remaining 12 bytes contain garbage.

Hence addresses added in this way never match any packets, unless these
uninitialized 12 bytes happened to be zero by coincidence. Similarly, addresses
cannot consistently be removed using the proc interface due to mismatch of the
garbage bytes (although it will sometimes work to remove an address that was
added manually).

Reading the /proc/net/xt_recent/ entries hides this problem because this only
uses the first 4 bytes when displaying IPv4 addresses.

Steps to reproduce:
$ iptables -I INPUT -m recent --rcheck -j LOG
$ echo +169.254.156.239 > /proc/net/xt_recent/DEFAULT
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910

[At this point no packets from 169.254.156.239 are being logged.]

$ iptables -I INPUT -s 169.254.156.239 -m recent --set
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910
src=169.254.156.239 ttl: 255 last_seen: 126184 oldest_pkt: 4 125434, 125684, 125934, 126184

[At this point, adding the address via an iptables rule, packets are being
logged correctly.]

$ echo -169.254.156.239 > /proc/net/xt_recent/DEFAULT
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910
src=169.254.156.239 ttl: 255 last_seen: 126992 oldest_pkt: 10 125434, 125684, 125934, 126184, 126434, 126684, 126934, 126991, 126991, 126992
$ echo -169.254.156.239 > /proc/net/xt_recent/DEFAULT
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910
src=169.254.156.239 ttl: 255 last_seen: 126992 oldest_pkt: 10 125434, 125684, 125934, 126184, 126434, 126684, 126934, 126991, 126991, 126992

[Removing the address via /proc interface failed evidently.]

Possible solutions:
- initialize the addr variable in recent_mt_proc_write
- compare only 4 bytes for IPv4 addresses in recent_entry_lookup

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index fe80b61..791e030 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -542,7 +542,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 	struct recent_entry *e;
 	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
 	const char *c = buf;
-	union nf_inet_addr addr;
+	union nf_inet_addr addr = {};
 	u_int16_t family;
 	bool add, succ;
 
-- 
cgit v1.1


From a52b8bd338630f78a6bfe39fe17cb8469d2679ae Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 24 Feb 2009 16:40:16 -0800
Subject: tcp_scalable: Update malformed & dead url

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_scalable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 2747ec7..4660b08 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -1,6 +1,6 @@
 /* Tom Kelly's Scalable TCP
  *
- * See htt://www-lce.eng.cam.ac.uk/~ctk21/scalable/
+ * See http://www.deneholme.net/tom/scalable/
  *
  * John Heffner <jheffner@sc.edu>
  */
-- 
cgit v1.1


From 3f53a38131a4e7a053c0aa060aba0411242fb6b9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 26 Feb 2009 03:35:13 -0800
Subject: ipv6: don't use tw net when accounting for recycled tw

We already have a valid net in that place, but this is not just a
cleanup - the tw pointer can be NULL there sometimes, thus causing
an oops in NET_NS=y case.

The same place in ipv4 code already works correctly using existing
net, rather than tw's one.

The bug exists since 2.6.27.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_hashtables.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 8fe267f..1bcc343 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -258,11 +258,11 @@ unique:
 
 	if (twp != NULL) {
 		*twp = tw;
-		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 	} else if (tw != NULL) {
 		/* Silly. Should hash-dance instead... */
 		inet_twsk_deschedule(tw, death_row);
-		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 
 		inet_twsk_put(tw);
 	}
-- 
cgit v1.1


From 1844f747947bb89d7f12cd3034548805113f764b Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 27 Feb 2009 02:42:38 -0800
Subject: pkt_sched: sch_drr: Fix oops in drr_change_class.

drr_change_class lacks a check for NULL of tca[TCA_OPTIONS], so oops
is possible.

Reported-by: Denys Fedoryschenko <denys@visp.net.lb>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_drr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f6b4fa9..e36e94a 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -66,11 +66,15 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl = (struct drr_class *)*arg;
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_DRR_MAX + 1];
 	u32 quantum;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy);
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.1


From 4ead443163b798661c2a2ede5e512e116a9e41e7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 1 Mar 2009 00:11:52 -0800
Subject: netpoll: Add drop checks to all entry points

The netpoll entry checks are required to ensure that we don't
receive normal packets when invoked via netpoll.  Unfortunately
it only ever worked for the netif_receive_skb/netif_rx entry
points.  The VLAN (and subsequently GRO) entry point didn't
have the check and therefore can trigger all sorts of weird
problems.

This patch adds the netpoll check to all entry points.

I'm still uneasy with receiving at all under netpoll (which
apparently is only used by the out-of-tree kdump code).  The
reason is it is perfectly legal to receive all data including
headers into highmem if netpoll is off, but if you try to do
that with netpoll on and someone gets a printk in an IRQ handler
you're going to get a nice BUG_ON.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 10 ++++++++++
 net/core/dev.c        |  6 ++++++
 2 files changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e9db889..2886d2f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -1,12 +1,16 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
+#include <linux/netpoll.h>
 #include "vlan.h"
 
 /* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
 int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		      u16 vlan_tci, int polling)
 {
+	if (netpoll_rx(skb))
+		return NET_RX_DROP;
+
 	if (skb_bond_should_drop(skb))
 		goto drop;
 
@@ -100,6 +104,9 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 {
 	int err = NET_RX_SUCCESS;
 
+	if (netpoll_receive_skb(skb))
+		return NET_RX_DROP;
+
 	switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
 	case -1:
 		return netif_receive_skb(skb);
@@ -126,6 +133,9 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	if (!skb)
 		goto out;
 
+	if (netpoll_receive_skb(skb))
+		goto out;
+
 	err = NET_RX_SUCCESS;
 
 	switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
diff --git a/net/core/dev.c b/net/core/dev.c
index a17e006..72b0d26f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2488,6 +2488,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	if (netpoll_receive_skb(skb))
+		return NET_RX_DROP;
+
 	switch (__napi_gro_receive(napi, skb)) {
 	case -1:
 		return netif_receive_skb(skb);
@@ -2558,6 +2561,9 @@ int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
 	if (!skb)
 		goto out;
 
+	if (netpoll_receive_skb(skb))
+		goto out;
+
 	err = NET_RX_SUCCESS;
 
 	switch (__napi_gro_receive(napi, skb)) {
-- 
cgit v1.1


From 9ec06ff57a9badef3b6b019f35efc6b21fc27d03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 1 Mar 2009 00:21:36 -0800
Subject: tcp: fix retrans_out leaks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's conflicting assumptions in shifting, the caller assumes
that dupsack results in S'ed skbs (or a part of it) for sure but
never gave a hint to tcp_sacktag_one when dsack is actually in
use. Thus DSACK retrans_out -= pcount was not taken and the
counter became out of sync. Remove obstacle from that information
flow to get DSACKs accounted in tcp_sacktag_one as expected.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Tested-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a6961d7..c28976a7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1374,7 +1374,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 
 static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 			   struct tcp_sacktag_state *state,
-			   unsigned int pcount, int shifted, int mss)
+			   unsigned int pcount, int shifted, int mss,
+			   int dup_sack)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
@@ -1410,7 +1411,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* We discard results */
-	tcp_sacktag_one(skb, sk, state, 0, pcount);
+	tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
 
 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
 	TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1561,7 +1562,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 
 	if (!skb_shift(prev, skb, len))
 		goto fallback;
-	if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss))
+	if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
 		goto out;
 
 	/* Hole filled allows collapsing with the next as well, this is very
@@ -1580,7 +1581,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	len = skb->len;
 	if (skb_shift(prev, skb, len)) {
 		pcount += tcp_skb_pcount(skb);
-		tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss);
+		tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
 	}
 
 out:
-- 
cgit v1.1