From 782a6675119c76c071e74e2ddd98268f47770cba Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 3 Aug 2006 23:54:41 +1000
Subject: [PATCH] Send wireless netlink events with a clean slate

Drivers expect to be able to call wireless_send_event in arbitrary
contexts.  On the other hand, netlink really doesn't like being
invoked in an IRQ context.  So we need to postpone the sending of
netlink skb's to a tasklet.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/core/wireless.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/wireless.c b/net/core/wireless.c
index d2bc72d..de0bde4 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -82,6 +82,7 @@
 #include <linux/init.h>			/* for __init */
 #include <linux/if_arp.h>		/* ARPHRD_ETHER */
 #include <linux/etherdevice.h>		/* compare_ether_addr */
+#include <linux/interrupt.h>
 
 #include <linux/wireless.h>		/* Pretty obvious */
 #include <net/iw_handler.h>		/* New driver API */
@@ -1842,6 +1843,18 @@ int wireless_rtnetlink_set(struct net_device *	dev,
  */
 
 #ifdef WE_EVENT_RTNETLINK
+static struct sk_buff_head wireless_nlevent_queue;
+
+static void wireless_nlevent_process(unsigned long data)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
+		netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
+}
+
+static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
+
 /* ---------------------------------------------------------------- */
 /*
  * Fill a rtnetlink message with our event data.
@@ -1904,8 +1917,17 @@ static inline void rtmsg_iwinfo(struct net_device *	dev,
 		return;
 	}
 	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC);
+	skb_queue_tail(&wireless_nlevent_queue, skb);
+	tasklet_schedule(&wireless_nlevent_tasklet);
+}
+
+static int __init wireless_nlevent_init(void)
+{
+	skb_queue_head_init(&wireless_nlevent_queue);
+	return 0;
 }
+
+subsys_initcall(wireless_nlevent_init);
 #endif	/* WE_EVENT_RTNETLINK */
 
 /* ---------------------------------------------------------------- */
-- 
cgit v1.1


From 7b2e497a06c0e93719fda88820e057b635e8fae2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Aug 2006 16:09:04 -0700
Subject: [NET]: Assign skb->dev in netdev_alloc_skb

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 022d889..c54f366 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -268,8 +268,10 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 	struct sk_buff *skb;
 
 	skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
-	if (likely(skb))
+	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD);
+		skb->dev = dev;
+	}
 	return skb;
 }
 
-- 
cgit v1.1


From 8b5cc5ef40c83c6ea4c90b203bb2c8b17edfa11b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Mon, 7 Aug 2006 20:09:20 -0700
Subject: [IPX]: Header length validation needed

This patch will linearize and check there is enough data.
It handles the pprop case as well as avoiding a whole audit of
the routing code.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipx/af_ipx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index aa34ff4..c13e86b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1646,7 +1646,8 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
 	ipx_pktsize	= ntohs(ipx->ipx_pktsize);
 	
 	/* Too small or invalid header? */
-	if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len)
+	if (ipx_pktsize < sizeof(struct ipxhdr) ||
+	    !pskb_may_pull(skb, ipx_pktsize))
 		goto drop;
                         
 	if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
-- 
cgit v1.1


From 8d1502de27c46b365b5c86e17d173083d3d6c9ac Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Mon, 7 Aug 2006 20:44:22 -0700
Subject: [IPV4]: Limit rt cache size properly.

From: Kirill Korotaev <dev@sw.ru>

During OpenVZ stress testing we found that UDP traffic with random src
can generate too much excessive rt hash growing leading finally to OOM
and kernel panics.

It was found that for 4GB i686 system (having 1048576 total pages and
  225280 normal zone pages) kernel allocates the following route hash:
syslog: IP route cache hash table entries: 262144 (order: 8, 1048576
bytes) => ip_rt_max_size = 4194304 entries, i.e.  max rt size is
4194304 * 256b = 1Gb of RAM > normal_zone

Attached the patch which removes HASH_HIGHMEM flag from
alloc_large_system_hash() call.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 19bd49d..b873cbc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3157,7 +3157,7 @@ int __init ip_rt_init(void)
 					rhash_entries,
 					(num_physpages >= 128 * 1024) ?
 					15 : 17,
-					HASH_HIGHMEM,
+					0,
 					&rt_hash_log,
 					&rt_hash_mask,
 					0);
-- 
cgit v1.1


From aaf580601ff244df82324fff380ed6740f27ef03 Mon Sep 17 00:00:00 2001
From: Chen-Li Tien <cltien@gmail.com>
Date: Mon, 7 Aug 2006 20:49:07 -0700
Subject: [PKTGEN]: Fix oops when used with balance-tlb bonding

Signed-off-by: Chen-Li Tien <cltien@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 67ed14d..b174337 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2149,6 +2149,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32);
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
+	skb->nh.iph = iph;
+	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
-- 
cgit v1.1


From 69d8c28c9578ce78b3dc1b9be36926d962282898 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 7 Aug 2006 20:52:10 -0700
Subject: [PKTGEN]: Make sure skb->{nh,h} are initialized in fill_packet_ipv6()
 too.

Mirror the bug fix from fill_packet_ipv4()

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index b174337..6a7320b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2462,6 +2462,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->protocol = protocol;
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
+	skb->nh.ipv6h = iph;
+	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
-- 
cgit v1.1


From bd37a088596ccdb2b2dd3299e25e333bca7a9a34 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@nanjing-fnst.com>
Date: Mon, 7 Aug 2006 21:04:15 -0700
Subject: [TCP]: SNMPv2 tcpOutSegs counter error

Do not count retransmitted segments.

Signed-off-by: Wei Yongjun <yjwei@nanjing-fnst.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5c08ea2..507adef 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -466,7 +466,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (skb->len != tcp_header_size)
 		tcp_event_data_sent(tp, skb, sk);
 
-	TCP_INC_STATS(TCP_MIB_OUTSEGS);
+	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
+		TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
 	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 	if (likely(err <= 0))
@@ -2157,10 +2158,9 @@ int tcp_connect(struct sock *sk)
 	skb_shinfo(buff)->gso_size = 0;
 	skb_shinfo(buff)->gso_type = 0;
 	buff->csum = 0;
+	tp->snd_nxt = tp->write_seq;
 	TCP_SKB_CB(buff)->seq = tp->write_seq++;
 	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
-	tp->snd_nxt = tp->write_seq;
-	tp->pushed_seq = tp->write_seq;
 
 	/* Send it off. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2170,6 +2170,12 @@ int tcp_connect(struct sock *sk)
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
 	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+
+	/* We change tp->snd_nxt after the tcp_transmit_skb() call
+	 * in order to make this packet get counted in tcpOutSegs.
+	 */
+	tp->snd_nxt = tp->write_seq;
+	tp->pushed_seq = tp->write_seq;
 	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 
 	/* Timer for repeating the SYN until an answer. */
-- 
cgit v1.1


From 70f8e78e150425b01c1099087ad3decacf7e4ccf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 8 Aug 2006 16:47:37 -0700
Subject: [RTNETLINK]: Fix IFLA_ADDRESS handling.

The ->set_mac_address handlers expect a pointer to a
sockaddr which contains the MAC address, whereas
IFLA_ADDRESS provides just the MAC address itself.

So whip up a sockaddr to wrap around the netlink
attribute for the ->set_mac_address call.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 20e5bb7..30cc1ba 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -394,6 +394,9 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	}
 
 	if (ida[IFLA_ADDRESS - 1]) {
+		struct sockaddr *sa;
+		int len;
+
 		if (!dev->set_mac_address) {
 			err = -EOPNOTSUPP;
 			goto out;
@@ -405,7 +408,17 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
 			goto out;
 
-		err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
+		len = sizeof(sa_family_t) + dev->addr_len;
+		sa = kmalloc(len, GFP_KERNEL);
+		if (!sa) {
+			err = -ENOMEM;
+			goto out;
+		}
+		sa->sa_family = dev->type;
+		memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]),
+		       dev->addr_len);
+		err = dev->set_mac_address(dev, sa);
+		kfree(sa);
 		if (err)
 			goto out;
 		send_addr_notify = 1;
-- 
cgit v1.1


From 7b1ba8de569460894efa892457af7a37c0d574f9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 8 Aug 2006 16:48:51 -0700
Subject: [IPX]: Another nonlinear receive fix

Need to check some more cases in IPX receive.  If the skb is purely
fragments, the IPX header needs to be extracted. The function
pskb_may_pull() may in theory invalidate all the pointers in the skb,
so references to ipx header must be refreshed.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipx/af_ipx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index c13e86b..40196420 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1642,14 +1642,17 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
 		goto out;
 
-	ipx		= ipx_hdr(skb);
-	ipx_pktsize	= ntohs(ipx->ipx_pktsize);
+	if (!pskb_may_pull(skb, sizeof(struct ipxhdr)))
+		goto drop;
+
+	ipx_pktsize = ntohs(ipxhdr(skb)->ipx_pktsize);
 	
 	/* Too small or invalid header? */
 	if (ipx_pktsize < sizeof(struct ipxhdr) ||
 	    !pskb_may_pull(skb, ipx_pktsize))
 		goto drop;
                         
+	ipx = ipx_hdr(skb);
 	if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
 	   ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize))
 		goto drop;
-- 
cgit v1.1


From 7c91767a6b701543c93ebcd611dab61deff3dad1 Mon Sep 17 00:00:00 2001
From: Dmitry Mishin <dim@openvz.org>
Date: Wed, 9 Aug 2006 02:25:54 -0700
Subject: [NET]: add_timer -> mod_timer() in dst_run_gc()

Patch from Dmitry Mishin <dim@openvz.org>:

Replace add_timer() by mod_timer() in dst_run_gc
in order to avoid BUG message.

       CPU1                            CPU2
dst_run_gc()  entered           dst_run_gc() entered
spin_lock(&dst_lock)                   .....
del_timer(&dst_gc_timer)         fail to get lock
       ....                         mod_timer() <--- puts
                                                 timer back
                                                 to the list
add_timer(&dst_gc_timer) <--- BUG because timer is in list already.

Found during OpenVZ internal testing.

At first we thought that it is OpenVZ specific as we
added dst_run_gc(0) call in dst_dev_event(),
but as Alexey pointed to me it is possible to trigger
this condition in mainstream kernel.

F.e. timer has fired on CPU2, but the handler was preeempted
by an irq before dst_lock is tried.
Meanwhile, someone on CPU1 adds an entry to gc list and
starts the timer.
If CPU2 was preempted long enough, this timer can expire
simultaneously with resuming timer handler on CPU1, arriving
exactly to the situation described.

Signed-off-by: Dmitry Mishin <dim@openvz.org>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 470c05b..1a5e49d 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dummy)
 		dst_gc_timer_inc = DST_GC_INC;
 		dst_gc_timer_expires = DST_GC_MIN;
 	}
-	dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
 #if RT_CACHE_DEBUG >= 2
 	printk("dst_total: %d/%d %ld\n",
 	       atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
 #endif
-	add_timer(&dst_gc_timer);
+	mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
 
 out:
 	spin_unlock(&dst_lock);
-- 
cgit v1.1


From 06aebfb7faa13258af5230ff3d1587ece6c0250e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 9 Aug 2006 16:52:04 -0700
Subject: [IPV6]: The ifa lock is a BH lock

The ifa lock is expected to be taken in BH context (by addrconf timers)
so we must disable BH when accessing it from user context.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8ea1e36..0c5042e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1909,11 +1909,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
 	ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
 
 	if (!IS_ERR(ifp)) {
-		spin_lock(&ifp->lock);
+		spin_lock_bh(&ifp->lock);
 		ifp->valid_lft = valid_lft;
 		ifp->prefered_lft = prefered_lft;
 		ifp->tstamp = jiffies;
-		spin_unlock(&ifp->lock);
+		spin_unlock_bh(&ifp->lock);
 
 		addrconf_dad_start(ifp, 0);
 		in6_ifa_put(ifp);
-- 
cgit v1.1


From fff642570dc47ab76491fe81ee6599269c4eb13e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 9 Aug 2006 17:36:15 -0700
Subject: [IPX]: Fix typo, ipxhdr() --> ipx_hdr()

Noticed by Dave Jones.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipx/af_ipx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 40196420..bef3f61 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1645,7 +1645,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
 	if (!pskb_may_pull(skb, sizeof(struct ipxhdr)))
 		goto drop;
 
-	ipx_pktsize = ntohs(ipxhdr(skb)->ipx_pktsize);
+	ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize);
 	
 	/* Too small or invalid header? */
 	if (ipx_pktsize < sizeof(struct ipxhdr) ||
-- 
cgit v1.1


From 18b6fe64d4d1f6e0a2c71429a5e5074f43e29203 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 13 Aug 2006 18:05:09 -0700
Subject: [TCP]: Fix botched memory leak fix to tcpprobe_read().

Somehow I clobbered James's original fix and only my
subsequent compiler warning change went in for that
changeset.

Get the real fix in there.

Noticed by Jesper Juhl.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_probe.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index b343532..dab37d2 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
 	error = wait_event_interruptible(tcpw.wait,
 					 __kfifo_len(tcpw.fifo) != 0);
 	if (error)
-		return error;
+		goto out_free;
 
 	cnt = kfifo_get(tcpw.fifo, tbuf, len);
 	error = copy_to_user(buf, tbuf, cnt);
 
+out_free:
 	vfree(tbuf);
 
 	return error ? error : cnt;
-- 
cgit v1.1


From 97c802a113989800430a981b6f36b14c62163d37 Mon Sep 17 00:00:00 2001
From: Phil Oester <kernel@linuxace.com>
Date: Sun, 13 Aug 2006 18:05:35 -0700
Subject: [NETFILTER]: xt_string: fix negation

The xt_string match is broken with ! negation.
This resolves a portion of netfilter bugzilla #497.

Signed-off-by: Phil Oester <kernel@linuxace.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_string.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index d8e3891..275330f 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -37,7 +37,7 @@ static int match(const struct sk_buff *skb,
 
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset, 
 			     conf->to_offset, conf->config, &state) 
-			     != UINT_MAX) && !conf->invert;
+			     != UINT_MAX) ^ conf->invert;
 }
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m)
-- 
cgit v1.1


From 1c7628bd7a458faf7c96ef521f6d3a5ea9b106b8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 13 Aug 2006 18:06:02 -0700
Subject: [NETFILTER]: xt_hashlimit: fix limit off-by-one

Hashlimit doesn't account for the first packet, which is inconsistent
with the limit match.

Reported by ryan.castellucci@gmail.com, netfilter bugzilla #500.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_hashlimit.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 6b66244..3bd2368 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -454,15 +454,12 @@ hashlimit_match(const struct sk_buff *skb,
 		dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * 
 							hinfo->cfg.burst);
 		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-
-		spin_unlock_bh(&hinfo->lock);
-		return 1;
+	} else {
+		/* update expiration timeout */
+		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
+		rateinfo_recalc(dh, now);
 	}
 
-	/* update expiration timeout */
-	dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-
-	rateinfo_recalc(dh, now);
 	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
 		/* We're underlimit. */
 		dh->rateinfo.credit -= dh->rateinfo.cost;
-- 
cgit v1.1


From d49c73c729e2ef644558a1f441c044bfacdc9744 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 13 Aug 2006 18:55:53 -0700
Subject: [IPSEC]: Validate properly in xfrm_dst_check()

If dst->obsolete is -1, this is a signal from the
bundle creator that we want the XFRM dst and the
dsts that it references to be validated on every
use.

I misunderstood this intention when I changed
xfrm_dst_check() to always return NULL.

Now, when we purge a dst entry, by running dst_free()
on it.  This will set the dst->obsolete to a positive
integer, and we want to return NULL in that case so
that the socket does a relookup for the route.

Thus, if dst->obsolete<0, let stale_bundle() validate
the state, else always return NULL.

In general, we need to do things more intelligently
here because we flush too much state during rule
changes.  Herbert Xu has some ideas wherein the key
manager gives us some help in this area.  We can also
use smarter state management algorithms inside of
the kernel as well.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f35bc676..3da67ca 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1134,12 +1134,33 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 }
 EXPORT_SYMBOL(__xfrm_route_forward);
 
+/* Optimize later using cookies and generation ids. */
+
 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 {
-	/* If it is marked obsolete, which is how we even get here,
-	 * then we have purged it from the policy bundle list and we
-	 * did that for a good reason.
+	/* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
+	 * to "-1" to force all XFRM destinations to get validated by
+	 * dst_ops->check on every use.  We do this because when a
+	 * normal route referenced by an XFRM dst is obsoleted we do
+	 * not go looking around for all parent referencing XFRM dsts
+	 * so that we can invalidate them.  It is just too much work.
+	 * Instead we make the checks here on every use.  For example:
+	 *
+	 *	XFRM dst A --> IPv4 dst X
+	 *
+	 * X is the "xdst->route" of A (X is also the "dst->path" of A
+	 * in this example).  If X is marked obsolete, "A" will not
+	 * notice.  That's what we are validating here via the
+	 * stale_bundle() check.
+	 *
+	 * When a policy's bundle is pruned, we dst_free() the XFRM
+	 * dst which causes it's ->obsolete field to be set to a
+	 * positive non-zero integer.  If an XFRM dst has been pruned
+	 * like this, we want to force a new route lookup.
 	 */
+	if (dst->obsolete < 0 && !stale_bundle(dst))
+		return dst;
+
 	return NULL;
 }
 
-- 
cgit v1.1


From 7ee66fcb94cb8be77d5f34cce7d315d11759f9c1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Sun, 13 Aug 2006 18:56:26 -0700
Subject: [LLC]: multicast receive device match

Fix from Aji_Srinivas@emc.com, STP packets are incorrectly received on
all LLC datagram sockets, whichever interface they are bound to.  The
llc_sap datagram receive logic sends packets with a unicast
destination MAC to one socket bound to that SAP and MAC, and multicast
packets to all sockets bound to that SAP. STP packets are multicast,
and we do need to know on which interface they were received.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_sap.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 42eb0c3..61cb8cf 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -330,6 +330,9 @@ static void llc_sap_mcast(struct llc_sap *sap,
 		if (llc->laddr.lsap != laddr->lsap)
 			continue;
 
+		if (llc->dev != skb->dev)
+			continue;
+
 		skb1 = skb_clone(skb, GFP_ATOMIC);
 		if (!skb1)
 			break;
-- 
cgit v1.1


From 0eff66e625306a794ecba4b29ed12f7a147ce219 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 13 Aug 2006 18:57:28 -0700
Subject: [NETFILTER]: {arp,ip,ip6}_tables: proper error recovery in init path

Neither of {arp,ip,ip6}_tables cleans up behind itself when something goes
wrong during initialization.

Noticed by Rennie deGraaf <degraaf@cpsc.ucalgary.ca>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 27 ++++++++++++++++++++-------
 net/ipv4/netfilter/ip_tables.c  | 33 +++++++++++++++++++++++++--------
 net/ipv6/netfilter/ip6_tables.c | 34 +++++++++++++++++++++++++---------
 3 files changed, 70 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 80c73ca..df4854c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1170,21 +1170,34 @@ static int __init arp_tables_init(void)
 {
 	int ret;
 
-	xt_proto_init(NF_ARP);
+	ret = xt_proto_init(NF_ARP);
+	if (ret < 0)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(&arpt_standard_target);
-	xt_register_target(&arpt_error_target);
+	ret = xt_register_target(&arpt_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(&arpt_error_target);
+	if (ret < 0)
+		goto err3;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&arpt_sockopts);
-	if (ret < 0) {
-		duprintf("Unable to register sockopts.\n");
-		return ret;
-	}
+	if (ret < 0)
+		goto err4;
 
 	printk("arp_tables: (C) 2002 David S. Miller\n");
 	return 0;
+
+err4:
+	xt_unregister_target(&arpt_error_target);
+err3:
+	xt_unregister_target(&arpt_standard_target);
+err2:
+	xt_proto_fini(NF_ARP);
+err1:
+	return ret;
 }
 
 static void __exit arp_tables_fini(void)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fc5bdd5..f316ff5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2239,22 +2239,39 @@ static int __init ip_tables_init(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET);
+	ret = xt_proto_init(AF_INET);
+	if (ret < 0)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(&ipt_standard_target);
-	xt_register_target(&ipt_error_target);
-	xt_register_match(&icmp_matchstruct);
+	ret = xt_register_target(&ipt_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(&ipt_error_target);
+	if (ret < 0)
+		goto err3;
+	ret = xt_register_match(&icmp_matchstruct);
+	if (ret < 0)
+		goto err4;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ipt_sockopts);
-	if (ret < 0) {
-		duprintf("Unable to register sockopts.\n");
-		return ret;
-	}
+	if (ret < 0)
+		goto err5;
 
 	printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+err5:
+	xt_unregister_match(&icmp_matchstruct);
+err4:
+	xt_unregister_target(&ipt_error_target);
+err3:
+	xt_unregister_target(&ipt_standard_target);
+err2:
+	xt_proto_fini(AF_INET);
+err1:
+	return ret;
 }
 
 static void __exit ip_tables_fini(void)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f26898b..c9d6b23 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1398,23 +1398,39 @@ static int __init ip6_tables_init(void)
 {
 	int ret;
 
-	xt_proto_init(AF_INET6);
+	ret = xt_proto_init(AF_INET6);
+	if (ret < 0)
+		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	xt_register_target(&ip6t_standard_target);
-	xt_register_target(&ip6t_error_target);
-	xt_register_match(&icmp6_matchstruct);
+	ret = xt_register_target(&ip6t_standard_target);
+	if (ret < 0)
+		goto err2;
+	ret = xt_register_target(&ip6t_error_target);
+	if (ret < 0)
+		goto err3;
+	ret = xt_register_match(&icmp6_matchstruct);
+	if (ret < 0)
+		goto err4;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&ip6t_sockopts);
-	if (ret < 0) {
-		duprintf("Unable to register sockopts.\n");
-		xt_proto_fini(AF_INET6);
-		return ret;
-	}
+	if (ret < 0)
+		goto err5;
 
 	printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
 	return 0;
+
+err5:
+	xt_unregister_match(&icmp6_matchstruct);
+err4:
+	xt_unregister_target(&ip6t_error_target);
+err3:
+	xt_unregister_target(&ip6t_standard_target);
+err2:
+	xt_proto_fini(AF_INET6);
+err1:
+	return ret;
 }
 
 static void __exit ip6_tables_fini(void)
-- 
cgit v1.1


From dcb7cd97f133f7cfbd181149a1e60215a869f895 Mon Sep 17 00:00:00 2001
From: Mark Huang <mlhuang@cs.princeton.edu>
Date: Sun, 13 Aug 2006 18:57:54 -0700
Subject: [NETFILTER]: ulog: fix panic on SMP kernels

Fix kernel panic on various SMP machines. The culprit is a null
ub->skb in ulog_send(). If ulog_timer() has already been scheduled on
one CPU and is spinning on the lock, and ipt_ulog_packet() flushes the
queue on another CPU by calling ulog_send() right before it exits,
there will be no skbuff when ulog_timer() acquires the lock and calls
ulog_send(). Cancelling the timer in ulog_send() doesn't help because
it has already been scheduled and is running on the first CPU.

Similar problem exists in ebt_ulog.c and nfnetlink_log.c.

Signed-off-by: Mark Huang <mlhuang@cs.princeton.edu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_ulog.c | 3 +++
 net/ipv4/netfilter/ipt_ULOG.c   | 5 +++++
 net/netfilter/nfnetlink_log.c   | 3 +++
 3 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 02693a2..9f950db 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -74,6 +74,9 @@ static void ulog_send(unsigned int nlgroup)
 	if (timer_pending(&ub->timer))
 		del_timer(&ub->timer);
 
+	if (!ub->skb)
+		return;
+
 	/* last nlmsg needs NLMSG_DONE */
 	if (ub->qlen > 1)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d7dd7fe..d46fd67 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -115,6 +115,11 @@ static void ulog_send(unsigned int nlgroupnum)
 		del_timer(&ub->timer);
 	}
 
+	if (!ub->skb) {
+		DEBUGP("ipt_ULOG: ulog_send: nothing to send\n");
+		return;
+	}
+
 	/* last nlmsg needs NLMSG_DONE */
 	if (ub->qlen > 1)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 61cdda4..b59d3b2 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -366,6 +366,9 @@ __nfulnl_send(struct nfulnl_instance *inst)
 	if (timer_pending(&inst->timer))
 		del_timer(&inst->timer);
 
+	if (!inst->skb)
+		return 0;
+
 	if (inst->qlen > 1)
 		inst->lastnlh->nlmsg_type = NLMSG_DONE;
 
-- 
cgit v1.1


From e9fa4f7bd291c29a785666e2fa5a9cf3241ee6c3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 13 Aug 2006 20:12:58 -0700
Subject: [INET]: Use pskb_trim_unique when trimming paged unique skbs

The IPv4/IPv6 datagram output path was using skb_trim to trim paged
packets because they know that the packet has not been cloned yet
(since the packet hasn't been given to anything else in the system).

This broke because skb_trim no longer allows paged packets to be
trimmed.  Paged packets must be given to one of the pskb_trim functions
instead.

This patch adds a new pskb_trim_unique function to cover the IPv4/IPv6
datagram output path scenario and replaces the corresponding skb_trim
calls with it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 4 ++--
 net/ipv6/ip6_output.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9bf307a..4c20f55 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -947,7 +947,7 @@ alloc_new_skb:
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				data += fraggap;
-				skb_trim(skb_prev, maxfraglen);
+				pskb_trim_unique(skb_prev, maxfraglen);
 			}
 
 			copy = datalen - transhdrlen - fraggap;
@@ -1142,7 +1142,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 					data, fraggap, 0);
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
-				skb_trim(skb_prev, maxfraglen);
+				pskb_trim_unique(skb_prev, maxfraglen);
 			}
 
 			/*
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 69451af..4fb47a2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1095,7 +1095,7 @@ alloc_new_skb:
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				data += fraggap;
-				skb_trim(skb_prev, maxfraglen);
+				pskb_trim_unique(skb_prev, maxfraglen);
 			}
 			copy = datalen - transhdrlen - fraggap;
 			if (copy < 0) {
-- 
cgit v1.1


From 7ea49ed73c8d0d0bdf7c11fc18c61572d2d22176 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 14 Aug 2006 17:08:36 -0700
Subject: [VLAN]: Make sure bonding packet drop checks get done in hwaccel RX
 path.

Since __vlan_hwaccel_rx() is essentially bypassing the
netif_receive_skb() call that would have occurred if we did the VLAN
decapsulation in software, we are missing the skb_bond() call and the
assosciated checks it does.

Export those checks via an inline function, skb_bond_should_drop(),
and use this in __vlan_hwaccel_rx().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d95e262..9fe96cd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1619,26 +1619,10 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
 	struct net_device *dev = skb->dev;
 
 	if (dev->master) {
-		/*
-		 * On bonding slaves other than the currently active
-		 * slave, suppress duplicates except for 802.3ad
-		 * ETH_P_SLOW and alb non-mcast/bcast.
-		 */
-		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-			if (dev->master->priv_flags & IFF_MASTER_ALB) {
-				if (skb->pkt_type != PACKET_BROADCAST &&
-				    skb->pkt_type != PACKET_MULTICAST)
-					goto keep;
-			}
-
-			if (dev->master->priv_flags & IFF_MASTER_8023AD &&
-			    skb->protocol == __constant_htons(ETH_P_SLOW))
-				goto keep;
-		
+		if (skb_bond_should_drop(skb)) {
 			kfree_skb(skb);
 			return NULL;
 		}
-keep:
 		skb->dev = dev->master;
 	}
 
-- 
cgit v1.1


From 855751125093f758871b70da2951d8b92b6368cc Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 15 Aug 2006 00:03:01 -0700
Subject: [NET]: Fix potential stack overflow in net/core/utils.c

On High end systems (1024 or so cpus) this can potentially cause stack
overflow.  Fix the stack usage.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/utils.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/utils.c b/net/core/utils.c
index 4f96f38..e31c90e 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -130,12 +130,13 @@ void __init net_random_init(void)
 static int net_random_reseed(void)
 {
 	int i;
-	unsigned long seed[NR_CPUS];
+	unsigned long seed;
 
-	get_random_bytes(seed, sizeof(seed));
 	for_each_possible_cpu(i) {
 		struct nrnd_state *state = &per_cpu(net_rand_state,i);
-		__net_srandom(state, seed[i]);
+
+		get_random_bytes(&seed, sizeof(seed));
+		__net_srandom(state, seed);
 	}
 	return 0;
 }
-- 
cgit v1.1


From deb47c66e12a645f7eec9b1c153c05ed47989439 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Tue, 15 Aug 2006 00:04:56 -0700
Subject: [NETFILTER]: xt_physdev build fix

It needs netfilter_bridge.h for brnf_deferred_hooks

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_physdev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index a9f4f6f..63a9654 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter/xt_physdev.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge.h>
-- 
cgit v1.1


From 640c41c77a96dbbfb74d40ae86ab75b759afb911 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 15 Aug 2006 00:06:56 -0700
Subject: [IPV6] lockdep: annotate __icmpv6_socket

Split off __icmpv6_socket's sk->sk_dst_lock class, because it gets
used from softirqs, which is safe for __icmpv6_sockets (because they
never get directly used via userspace syscalls), but unsafe for normal
sockets.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1044b6f..3d6e9a3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -712,6 +712,11 @@ discard_it:
 	return 0;
 }
 
+/*
+ * Special lock-class for __icmpv6_socket:
+ */
+static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
+
 int __init icmpv6_init(struct net_proto_family *ops)
 {
 	struct sock *sk;
@@ -730,6 +735,14 @@ int __init icmpv6_init(struct net_proto_family *ops)
 
 		sk = per_cpu(__icmpv6_socket, i)->sk;
 		sk->sk_allocation = GFP_ATOMIC;
+		/*
+		 * Split off their lock-class, because sk->sk_dst_lock
+		 * gets used from softirqs, which is safe for
+		 * __icmpv6_socket (because those never get directly used
+		 * via userspace syscalls), but unsafe for normal sockets.
+		 */
+		lockdep_set_class(&sk->sk_dst_lock,
+				  &icmpv6_socket_sk_dst_lock_key);
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
-- 
cgit v1.1


From bb699cbca0096aa3f5f750264ec0af080732375a Mon Sep 17 00:00:00 2001
From: Michal Ruzicka <michal.ruzicka@comstar.cz>
Date: Tue, 15 Aug 2006 00:20:17 -0700
Subject: [IPV4]: Possible leak of multicast source filter sctructure

There is a leak of a socket's multicast source filter list structure
on closing a socket with a multicast source filter set on an interface
that does not exist any more.

Signed-off-by: Michal Ruzicka <michal.ruzicka@comstar.cz>
Acked-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9f4b752..e981369 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2199,13 +2199,13 @@ void ip_mc_drop_socket(struct sock *sk)
 		struct in_device *in_dev;
 		inet->mc_list = iml->next;
 
-		if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) {
-			(void) ip_mc_leave_src(sk, iml, in_dev);
+		in_dev = inetdev_by_index(iml->multi.imr_ifindex);
+		(void) ip_mc_leave_src(sk, iml, in_dev);
+		if (in_dev != NULL) {
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
 			in_dev_put(in_dev);
 		}
 		sock_kfree_s(sk, iml, sizeof(*iml));
-
 	}
 	rtnl_unlock();
 }
-- 
cgit v1.1


From b9c6e3e96669ade31afd3a39f17393e577b609c5 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@mvista.com>
Date: Tue, 15 Aug 2006 02:02:33 -0700
Subject: [ATM]: Compile error on ARM

atm_proc_exit() is declared as __exit, and thus in .exit.text.  On
some architectures (ARM) .exit.text is discarded at compile time, and
since atm_proc_exit() is called by some other __init functions, it
results in a link error.

Signed-off-by: Kevin Hilman <khilman@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/atm/proc.c b/net/atm/proc.c
index 3f95b08..91fe5f5 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -507,7 +507,7 @@ err_out:
 	goto out;
 }
 
-void __exit atm_proc_exit(void)
+void atm_proc_exit(void)
 {
 	atm_proc_dirs_remove();
 }
-- 
cgit v1.1


From c0956bd25161bff45304d482cda51ca4b3b572f1 Mon Sep 17 00:00:00 2001
From: Ralf Hildebrandt <Ralf.Hildebrandt@charite.de>
Date: Tue, 15 Aug 2006 02:12:43 -0700
Subject: [PKT_SCHED] cls_u32: Fix typo.

Signed-off-by: Ralf Hildebrandt <Ralf.Hildebrandt@charite.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index eea3669..0a6cfa0 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -796,7 +796,7 @@ static int __init init_u32(void)
 {
 	printk("u32 classifier\n");
 #ifdef CONFIG_CLS_U32_PERF
-	printk("    Perfomance counters on\n");
+	printk("    Performance counters on\n");
 #endif
 #ifdef CONFIG_NET_CLS_POLICE
 	printk("    OLD policer on \n");
-- 
cgit v1.1


From c7fa9d189e93877a1fa08ab00f230e0689125e45 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 15 Aug 2006 16:34:13 -0700
Subject: [NET]: Disallow whitespace in network device names.

It causes way too much trouble and confusion in userspace.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 9fe96cd..d4a1ec3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -116,6 +116,7 @@
 #include <linux/audit.h>
 #include <linux/dmaengine.h>
 #include <linux/err.h>
+#include <linux/ctype.h>
 
 /*
  *	The list of packet types we will receive (as opposed to discard)
@@ -632,14 +633,22 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas
  *	@name: name string
  *
  *	Network device names need to be valid file names to
- *	to allow sysfs to work
+ *	to allow sysfs to work.  We also disallow any kind of
+ *	whitespace.
  */
 int dev_valid_name(const char *name)
 {
-	return !(*name == '\0' 
-		 || !strcmp(name, ".")
-		 || !strcmp(name, "..")
-		 || strchr(name, '/'));
+	if (*name == '\0')
+		return 0;
+	if (!strcmp(name, ".") || !strcmp(name, ".."))
+		return 0;
+
+	while (*name) {
+		if (*name == '/' || isspace(*name))
+			return 0;
+		name++;
+	}
+	return 1;
 }
 
 /**
-- 
cgit v1.1


From acd6e00b8e4db542cb6bc9ddfbb4e18bbe29ce4d Mon Sep 17 00:00:00 2001
From: David L Stevens <dlstevens@us.ibm.com>
Date: Thu, 17 Aug 2006 16:27:39 -0700
Subject: [MCAST]: Fix filter leak on device removal.

This fixes source filter leakage when a device is removed and a
process leaves the group thereafter.

This also includes corresponding fixes for IPv6 multicast source
filters on device removal.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c  | 32 +++++++++++++++++++-------------
 net/ipv6/mcast.c | 10 ++++++----
 2 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index e981369..8e8117c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1793,29 +1793,35 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	struct in_device *in_dev;
 	u32 group = imr->imr_multiaddr.s_addr;
 	u32 ifindex;
+	int ret = -EADDRNOTAVAIL;
 
 	rtnl_lock();
 	in_dev = ip_mc_find_dev(imr);
-	if (!in_dev) {
-		rtnl_unlock();
-		return -ENODEV;
-	}
 	ifindex = imr->imr_ifindex;
 	for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
-		if (iml->multi.imr_multiaddr.s_addr == group &&
-		    iml->multi.imr_ifindex == ifindex) {
-			(void) ip_mc_leave_src(sk, iml, in_dev);
+		if (iml->multi.imr_multiaddr.s_addr != group)
+			continue;
+		if (ifindex) {
+			if (iml->multi.imr_ifindex != ifindex)
+				continue;
+		} else if (imr->imr_address.s_addr && imr->imr_address.s_addr !=
+				iml->multi.imr_address.s_addr)
+			continue;
 
-			*imlp = iml->next;
+		(void) ip_mc_leave_src(sk, iml, in_dev);
 
+		*imlp = iml->next;
+
+		if (in_dev)
 			ip_mc_dec_group(in_dev, group);
-			rtnl_unlock();
-			sock_kfree_s(sk, iml, sizeof(*iml));
-			return 0;
-		}
+		rtnl_unlock();
+		sock_kfree_s(sk, iml, sizeof(*iml));
+		return 0;
 	}
+	if (!in_dev)
+		ret = -ENODEV;
 	rtnl_unlock();
-	return -EADDRNOTAVAIL;
+	return ret;
 }
 
 int ip_mc_source(int add, int omode, struct sock *sk, struct
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9d697d4..639eb20 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -268,13 +268,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
 			if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
 				struct inet6_dev *idev = in6_dev_get(dev);
 
+				(void) ip6_mc_leave_src(sk, mc_lst, idev);
 				if (idev) {
-					(void) ip6_mc_leave_src(sk,mc_lst,idev);
 					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
 					in6_dev_put(idev);
 				}
 				dev_put(dev);
-			}
+			} else
+				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 			return 0;
 		}
@@ -334,13 +335,14 @@ void ipv6_sock_mc_close(struct sock *sk)
 		if (dev) {
 			struct inet6_dev *idev = in6_dev_get(dev);
 
+			(void) ip6_mc_leave_src(sk, mc_lst, idev);
 			if (idev) {
-				(void) ip6_mc_leave_src(sk, mc_lst, idev);
 				__ipv6_dev_mc_dec(idev, &mc_lst->addr);
 				in6_dev_put(idev);
 			}
 			dev_put(dev);
-		}
+		} else
+			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 
-- 
cgit v1.1


From 6e8fcbf64024f9056ba122abbb66554aa76bae5d Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Thu, 17 Aug 2006 16:44:46 -0700
Subject: [IPV4]: severe locking bug in fib_semantics.c

Found in 2.4 by Yixin Pan <yxpan@hotmail.com>.

> When I read fib_semantics.c of Linux-2.4.32, write_lock(&fib_info_lock) =
> is used in fib_release_info() instead of write_lock_bh(&fib_info_lock).  =
> Is the following case possible: a BH interrupts fib_release_info() while =
> holding the write lock, and calls ip_check_fib_default() which calls =
> read_lock(&fib_info_lock), and spin forever.

Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 9be53a8..5173800 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -159,7 +159,7 @@ void free_fib_info(struct fib_info *fi)
 
 void fib_release_info(struct fib_info *fi)
 {
-	write_lock(&fib_info_lock);
+	write_lock_bh(&fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		hlist_del(&fi->fib_hash);
 		if (fi->fib_prefsrc)
@@ -172,7 +172,7 @@ void fib_release_info(struct fib_info *fi)
 		fi->fib_dead = 1;
 		fib_info_put(fi);
 	}
-	write_unlock(&fib_info_lock);
+	write_unlock_bh(&fib_info_lock);
 }
 
 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -598,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	unsigned int old_size = fib_hash_size;
 	unsigned int i, bytes;
 
-	write_lock(&fib_info_lock);
+	write_lock_bh(&fib_info_lock);
 	old_info_hash = fib_info_hash;
 	old_laddrhash = fib_info_laddrhash;
 	fib_hash_size = new_size;
@@ -639,7 +639,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
 	}
 	fib_info_laddrhash = new_laddrhash;
 
-	write_unlock(&fib_info_lock);
+	write_unlock_bh(&fib_info_lock);
 
 	bytes = old_size * sizeof(struct hlist_head *);
 	fib_hash_free(old_info_hash, bytes);
@@ -820,7 +820,7 @@ link_it:
 
 	fi->fib_treeref++;
 	atomic_inc(&fi->fib_clntref);
-	write_lock(&fib_info_lock);
+	write_lock_bh(&fib_info_lock);
 	hlist_add_head(&fi->fib_hash,
 		       &fib_info_hash[fib_info_hashfn(fi)]);
 	if (fi->fib_prefsrc) {
@@ -839,7 +839,7 @@ link_it:
 		head = &fib_info_devhash[hash];
 		hlist_add_head(&nh->nh_hash, head);
 	} endfor_nexthops(fi)
-	write_unlock(&fib_info_lock);
+	write_unlock_bh(&fib_info_lock);
 	return fi;
 
 err_inval:
-- 
cgit v1.1


From d205dc40798d97d63ad348bfaf7394f445d152d4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 17 Aug 2006 18:12:38 -0700
Subject: [NETFILTER]: ctnetlink: fix deadlock in table dumping

ip_conntrack_put must not be called while holding ip_conntrack_lock
since destroy_conntrack takes it again.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_netlink.c | 17 +++++++----------
 net/netfilter/nf_conntrack_netlink.c      | 17 +++++++----------
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 33891bb..0d4cc92 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -415,21 +415,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 			cb->args[0], *id);
 
 	read_lock_bh(&ip_conntrack_lock);
+	last = (struct ip_conntrack *)cb->args[1];
 	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
 restart:
-		last = (struct ip_conntrack *)cb->args[1];
 		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
 			h = (struct ip_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
 			ct = tuplehash_to_ctrack(h);
-			if (last != NULL) {
-				if (ct == last) {
-					ip_conntrack_put(last);
-					cb->args[1] = 0;
-					last = NULL;
-				} else
+			if (cb->args[1]) {
+				if (ct != last)
 					continue;
+				cb->args[1] = 0;
 			}
 			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
 		                        	cb->nlh->nlmsg_seq,
@@ -440,17 +437,17 @@ restart:
 				goto out;
 			}
 		}
-		if (last != NULL) {
-			ip_conntrack_put(last);
+		if (cb->args[1]) {
 			cb->args[1] = 0;
 			goto restart;
 		}
 	}
 out:
 	read_unlock_bh(&ip_conntrack_lock);
+	if (last)
+		ip_conntrack_put(last);
 
 	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
 	return skb->len;
 }
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index af48459..6527d4e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -429,9 +429,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 			cb->args[0], *id);
 
 	read_lock_bh(&nf_conntrack_lock);
+	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
-		last = (struct nf_conn *)cb->args[1];
 		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
 			h = (struct nf_conntrack_tuple_hash *) i;
 			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -442,13 +442,10 @@ restart:
 			 * then dump everything. */
 			if (l3proto && L3PROTO(ct) != l3proto)
 				continue;
-			if (last != NULL) {
-				if (ct == last) {
-					nf_ct_put(last);
-					cb->args[1] = 0;
-					last = NULL;
-				} else
+			if (cb->args[1]) {
+				if (ct != last)
 					continue;
+				cb->args[1] = 0;
 			}
 			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
 		                        	cb->nlh->nlmsg_seq,
@@ -459,17 +456,17 @@ restart:
 				goto out;
 			}
 		}
-		if (last != NULL) {
-			nf_ct_put(last);
+		if (cb->args[1]) {
 			cb->args[1] = 0;
 			goto restart;
 		}
 	}
 out:
 	read_unlock_bh(&nf_conntrack_lock);
+	if (last)
+		nf_ct_put(last);
 
 	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
 	return skb->len;
 }
 
-- 
cgit v1.1


From 8311731afc439f508ab4d759edadedae75afb73e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 17 Aug 2006 18:13:53 -0700
Subject: [NETFILTER]: ip_tables: fix table locking in ipt_do_table

table->private might change because of ruleset changes, don't use it without
holding the lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_tables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f316ff5..048514f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -230,7 +230,7 @@ ipt_do_table(struct sk_buff **pskb,
 	const char *indev, *outdev;
 	void *table_base;
 	struct ipt_entry *e, *back;
-	struct xt_table_info *private = table->private;
+	struct xt_table_info *private;
 
 	/* Initialization */
 	ip = (*pskb)->nh.iph;
@@ -247,6 +247,7 @@ ipt_do_table(struct sk_buff **pskb,
 
 	read_lock_bh(&table->lock);
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	private = table->private;
 	table_base = (void *)private->entries[smp_processor_id()];
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-- 
cgit v1.1


From 78eb887733ec8ff5d6e6c69e3c32a187a9303622 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 17 Aug 2006 18:22:32 -0700
Subject: [BRIDGE]: Disable SG/GSO if TX checksum is off

When the bridge recomputes features, it does not maintain the
constraint that SG/GSO must be off if TX checksum is off.
This patch adds that constraint.

On a completely unrelated note, I've also added TSO6 and TSO_ECN
feature bits if GSO is enabled on the underlying device through
the new NETIF_F_GSO_SOFTWARE macro.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f55ef68..b1211d534 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -386,12 +386,17 @@ void br_features_recompute(struct net_bridge *br)
 			checksum = 0;
 
 		if (feature & NETIF_F_GSO)
-			feature |= NETIF_F_TSO;
+			feature |= NETIF_F_GSO_SOFTWARE;
 		feature |= NETIF_F_GSO;
 
 		features &= feature;
 	}
 
+	if (!(checksum & NETIF_F_ALL_CSUM))
+		features &= ~NETIF_F_SG;
+	if (!(features & NETIF_F_SG))
+		features &= ~NETIF_F_GSO_MASK;
+
 	br->dev->features = features | checksum | NETIF_F_LLTX |
 			    NETIF_F_GSO_ROBUST;
 }
-- 
cgit v1.1


From c164a9ba0a8870c5c9d353f63085319931d69f23 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Tue, 22 Aug 2006 11:50:39 -0700
Subject: Fix sctp privilege elevation (CVE-2006-3745)

sctp_make_abort_user() now takes the msg_len along with the msg
so that we don't have to recalculate the bytes in iovec.
It also uses memcpy_fromiovec() so that we don't go beyond the
length allocated.

It is good to have this fix even if verify_iovec() is fixed to
return error on overflow.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/sctp/sm_make_chunk.c | 30 +++++++++---------------------
 net/sctp/sm_statefuns.c  | 20 ++++----------------
 net/sctp/socket.c        | 10 +++++++++-
 3 files changed, 22 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4f11f58..17b5092 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -806,38 +806,26 @@ no_mem:
 
 /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error.  */
 struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
-				   const struct sctp_chunk *chunk,
-				   const struct msghdr *msg)
+					const struct msghdr *msg,
+					size_t paylen)
 {
 	struct sctp_chunk *retval;
-	void *payload = NULL, *payoff;
-	size_t paylen = 0;
-	struct iovec *iov = NULL;
-	int iovlen = 0;
-
-	if (msg) {
-		iov = msg->msg_iov;
-		iovlen = msg->msg_iovlen;
-		paylen = get_user_iov_size(iov, iovlen);
-	}
+	void *payload = NULL;
+	int err;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen);
+	retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
 	if (!retval)
 		goto err_chunk;
 
 	if (paylen) {
 		/* Put the msg_iov together into payload.  */
-		payload = kmalloc(paylen, GFP_ATOMIC);
+		payload = kmalloc(paylen, GFP_KERNEL);
 		if (!payload)
 			goto err_payload;
-		payoff = payload;
 
-		for (; iovlen > 0; --iovlen) {
-			if (copy_from_user(payoff, iov->iov_base,iov->iov_len))
-				goto err_copy;
-			payoff += iov->iov_len;
-			iov++;
-		}
+		err = memcpy_fromiovec(payload, msg->msg_iov, paylen);
+		if (err < 0)
+			goto err_copy;
 	}
 
 	sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ead3f1b..5b5ae79 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4031,18 +4031,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	 * from its upper layer, but retransmits data to the far end
 	 * if necessary to fill gaps.
 	 */
-	struct msghdr *msg = arg;
-	struct sctp_chunk *abort;
+	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
 
 	retval = SCTP_DISPOSITION_CONSUME;
 
-	/* Generate ABORT chunk to send the peer.  */
-	abort = sctp_make_abort_user(asoc, NULL, msg);
-	if (!abort)
-		retval = SCTP_DISPOSITION_NOMEM;
-	else
-		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
 
 	/* Even if we can't send the ABORT due to low memory delete the
 	 * TCB.  This is a departure from our typical NOMEM handling.
@@ -4166,8 +4160,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
-	struct msghdr *msg = arg;
-	struct sctp_chunk *abort;
+	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
 
 	/* Stop T1-init timer */
@@ -4175,12 +4168,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
 	retval = SCTP_DISPOSITION_CONSUME;
 
-	/* Generate ABORT chunk to send the peer */
-	abort = sctp_make_abort_user(asoc, NULL, msg);
-	if (!abort)
-		retval = SCTP_DISPOSITION_NOMEM;
-	else
-		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 54722e6..fde3f55 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1520,8 +1520,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			goto out_unlock;
 		}
 		if (sinfo_flags & SCTP_ABORT) {
+			struct sctp_chunk *chunk;
+
+			chunk = sctp_make_abort_user(asoc, msg, msg_len);
+			if (!chunk) {
+				err = -ENOMEM;
+				goto out_unlock;
+			}
+
 			SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
-			sctp_primitive_ABORT(asoc, msg);
+			sctp_primitive_ABORT(asoc, chunk);
 			err = 0;
 			goto out_unlock;
 		}
-- 
cgit v1.1


From e0b7cde9975e17a61b4511c7822803dfb7210011 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 21 Aug 2006 15:31:08 -0700
Subject: [NETFILTER]: arp_tables: fix table locking in arpt_do_table

table->private might change because of ruleset changes, don't use it
without holding the lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index df4854c..8d1d7a6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -236,7 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
-	struct xt_table_info *private = table->private;
+	struct xt_table_info *private;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
 	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -248,6 +248,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	outdev = out ? out->name : nulldevname;
 
 	read_lock_bh(&table->lock);
+	private = table->private;
 	table_base = (void *)private->entries[smp_processor_id()];
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
-- 
cgit v1.1


From 316c1592bea94ead75301cb764523661fbbcc1ca Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 22 Aug 2006 00:06:11 -0700
Subject: [TCP]: Limit window scaling if window is clamped.

This small change allows for easy per-route workarounds for broken hosts or
middleboxes that are not compliant with TCP standards for window scaling.
Rather than having to turn off window scaling globally. This patch allows
reducing or disabling window scaling if window clamp is present.

Example: Mark Lord reported a problem with 2.6.17 kernel being unable to
access http://www.everymac.com

# ip route add 216.145.246.23/32 via 10.8.0.1 window 65535

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 507adef..b4f3ffe 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
 		 * See RFC1323 for an explanation of the limit to 14 
 		 */
 		space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+		space = min_t(u32, space, *window_clamp);
 		while (space > 65535 && (*rcv_wscale) < 14) {
 			space >>= 1;
 			(*rcv_wscale)++;
-- 
cgit v1.1


From 5d67476fff2df6ff12f60b540fd0e74cf2a668f9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 31 Jul 2006 14:11:48 -0700
Subject: SUNRPC: make rpc_unlink() take a dentry argument instead of a path

Signe-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 88bf6d811b01a4be7fd507d18bf5f1c527989089 commit)
---
 net/sunrpc/auth_gss/auth_gss.c |  2 +-
 net/sunrpc/rpc_pipe.c          | 20 ++++++--------------
 2 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4a9aa93..beaa7b8 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -718,7 +718,7 @@ gss_destroy(struct rpc_auth *auth)
 		auth, auth->au_flavor);
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
-	rpc_unlink(gss_auth->path);
+	rpc_unlink(gss_auth->dentry);
 	dput(gss_auth->dentry);
 	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a3bd2db..9144f27 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -746,22 +746,15 @@ err_dput:
 }
 
 int
-rpc_unlink(char *path)
+rpc_unlink(struct dentry *dentry)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
+	struct dentry *parent;
 	struct inode *dir;
-	int error;
+	int error = 0;
 
-	if ((error = rpc_lookup_parent(path, &nd)) != 0)
-		return error;
-	dir = nd.dentry->d_inode;
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_release;
-	}
 	d_drop(dentry);
 	if (dentry->d_inode) {
 		rpc_close_pipes(dentry->d_inode);
@@ -769,9 +762,8 @@ rpc_unlink(char *path)
 	}
 	dput(dentry);
 	inode_dir_notify(dir, DN_DELETE);
-out_release:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(&nd);
+	dput(parent);
 	return error;
 }
 
-- 
cgit v1.1


From dff02cc1a34fcb60904a2c57cb351857cc11219e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 31 Jul 2006 14:17:18 -0700
Subject: NFS: clean up rpc_rmdir

Make it take a dentry argument instead of a path

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 648d4116eb2509f010f7f34704a650150309b3e7 commit)
---
 net/sunrpc/clnt.c     |  6 +++---
 net/sunrpc/rpc_pipe.c | 18 +++++-------------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d6409e7..d307556 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -183,7 +183,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 
 out_no_auth:
 	if (!IS_ERR(clnt->cl_dentry)) {
-		rpc_rmdir(clnt->cl_pathname);
+		rpc_rmdir(clnt->cl_dentry);
 		dput(clnt->cl_dentry);
 		rpc_put_mount();
 	}
@@ -320,8 +320,8 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		rpc_destroy_client(clnt->cl_parent);
 		goto out_free;
 	}
-	if (clnt->cl_pathname[0])
-		rpc_rmdir(clnt->cl_pathname);
+	if (!IS_ERR(clnt->cl_dentry))
+		rpc_rmdir(clnt->cl_dentry);
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9144f27..9c355e1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -684,28 +684,20 @@ err_dput:
 }
 
 int
-rpc_rmdir(char *path)
+rpc_rmdir(struct dentry *dentry)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
+	struct dentry *parent;
 	struct inode *dir;
 	int error;
 
-	if ((error = rpc_lookup_parent(path, &nd)) != 0)
-		return error;
-	dir = nd.dentry->d_inode;
+	parent = dget_parent(dentry);
+	dir = parent->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_release;
-	}
 	rpc_depopulate(dentry);
 	error = __rpc_rmdir(dir, dentry);
 	dput(dentry);
-out_release:
 	mutex_unlock(&dir->i_mutex);
-	rpc_release_path(&nd);
+	dput(parent);
 	return error;
 }
 
-- 
cgit v1.1


From 68adb0af51ebccb72ffb14d49cb8121b1afc4259 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 10 Aug 2006 17:51:46 -0400
Subject: SUNRPC: rpc_unlink() must check for unhashed dentries

A prior call to rpc_depopulate() by rpc_rmdir() on the parent directory may
have already called simple_unlink() on this entry.
Add the same check to rpc_rmdir(). Also remove a redundant call to
rpc_close_pipes() in rpc_rmdir.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 0bbfb9d20f6437c4031aa3bf9b4d311a053e58e3 commit)
---
 net/sunrpc/rpc_pipe.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9c355e1..0b1a1ac 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -539,6 +539,7 @@ repeat:
 				rpc_close_pipes(dentry->d_inode);
 				simple_unlink(dir, dentry);
 			}
+			inode_dir_notify(dir, DN_DELETE);
 			dput(dentry);
 		} while (n);
 		goto repeat;
@@ -610,8 +611,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
 	int error;
 
 	shrink_dcache_parent(dentry);
-	if (dentry->d_inode)
-		rpc_close_pipes(dentry->d_inode);
+	if (d_unhashed(dentry))
+		return 0;
 	if ((error = simple_rmdir(dir, dentry)) != 0)
 		return error;
 	if (!error) {
@@ -747,13 +748,15 @@ rpc_unlink(struct dentry *dentry)
 	parent = dget_parent(dentry);
 	dir = parent->d_inode;
 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-	d_drop(dentry);
-	if (dentry->d_inode) {
-		rpc_close_pipes(dentry->d_inode);
-		error = simple_unlink(dir, dentry);
+	if (!d_unhashed(dentry)) {
+		d_drop(dentry);
+		if (dentry->d_inode) {
+			rpc_close_pipes(dentry->d_inode);
+			error = simple_unlink(dir, dentry);
+		}
+		inode_dir_notify(dir, DN_DELETE);
 	}
 	dput(dentry);
-	inode_dir_notify(dir, DN_DELETE);
 	mutex_unlock(&dir->i_mutex);
 	dput(parent);
 	return error;
-- 
cgit v1.1


From 8f8e7a50f450fcb86a5b2ffb94543c57a14f8260 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 14 Aug 2006 13:11:15 -0400
Subject: SUNRPC: Fix dentry refcounting issues with users of rpc_pipefs

rpc_unlink() and rpc_rmdir() will dput the dentry reference for you.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from a05a57effa71a1f67ccbfc52335c10c8b85f3f6a commit)
---
 net/sunrpc/auth_gss/auth_gss.c |  1 -
 net/sunrpc/clnt.c              | 15 ++++++---------
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index beaa7b8..ef1cf5b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -719,7 +719,6 @@ gss_destroy(struct rpc_auth *auth)
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
 	rpc_unlink(gss_auth->dentry);
-	dput(gss_auth->dentry);
 	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d307556..d9eac70 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -184,7 +184,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 out_no_auth:
 	if (!IS_ERR(clnt->cl_dentry)) {
 		rpc_rmdir(clnt->cl_dentry);
-		dput(clnt->cl_dentry);
 		rpc_put_mount();
 	}
 out_no_path:
@@ -251,10 +250,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
 	new->cl_dead = 0;
-	if (!IS_ERR(new->cl_dentry)) {
+	if (!IS_ERR(new->cl_dentry))
 		dget(new->cl_dentry);
-		rpc_get_mount();
-	}
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
@@ -317,11 +314,15 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		clnt->cl_auth = NULL;
 	}
 	if (clnt->cl_parent != clnt) {
+		if (!IS_ERR(clnt->cl_dentry))
+			dput(clnt->cl_dentry);
 		rpc_destroy_client(clnt->cl_parent);
 		goto out_free;
 	}
-	if (!IS_ERR(clnt->cl_dentry))
+	if (!IS_ERR(clnt->cl_dentry)) {
 		rpc_rmdir(clnt->cl_dentry);
+		rpc_put_mount();
+	}
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
@@ -331,10 +332,6 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 out_free:
 	rpc_free_iostats(clnt->cl_metrics);
 	clnt->cl_metrics = NULL;
-	if (!IS_ERR(clnt->cl_dentry)) {
-		dput(clnt->cl_dentry);
-		rpc_put_mount();
-	}
 	kfree(clnt);
 	return 0;
 }
-- 
cgit v1.1


From e8896495bca8490a427409e0886d63d05419ec65 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 24 Aug 2006 15:44:19 -0400
Subject: NFS: Check lengths more thoroughly in NFS4 readdir XDR decode

Check the bounds of length specifiers more thoroughly in the XDR decoding of
NFS4 readdir reply data.

Currently, if the server returns a bitmap or attr length that causes the
current decode point pointer to wrap, this could go undetected (consider a
small "negative" length on a 32-bit machine).

Also add a check into the main XDR decode handler to make sure that the amount
of data is a multiple of four bytes (as specified by RFC-1014).  This makes
sure that we can do u32* pointer subtraction in the NFS client without risking
an undefined result (the result is undefined if the pointers are not correctly
aligned with respect to one another).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
(cherry picked from 5861fddd64a7eaf7e8b1a9997455a24e7f688092 commit)
---
 net/sunrpc/clnt.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d9eac70..3e19d32 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1181,6 +1181,17 @@ call_verify(struct rpc_task *task)
 	u32	*p = iov->iov_base, n;
 	int error = -EACCES;
 
+	if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
+		/* RFC-1014 says that the representation of XDR data must be a
+		 * multiple of four bytes
+		 * - if it isn't pointer subtraction in the NFS client may give
+		 *   undefined results
+		 */
+		printk(KERN_WARNING
+		       "call_verify: XDR representation not a multiple of"
+		       " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len);
+		goto out_eio;
+	}
 	if ((len -= 3) < 0)
 		goto out_overflow;
 	p += 1;	/* skip XID */
-- 
cgit v1.1


From 59eed279c5daa88d95e429782ddb8ef87e52c44b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 25 Aug 2006 15:55:43 -0700
Subject: [IPV6]: Segmentation offload not set correctly on TCP children

TCP over IPV6 would incorrectly inherit the GSO settings.
This would cause kernel to send Tcp Segmentation Offload packets for
IPV6 data to devices that can't handle it. It caused the sky2 driver
to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050
and the e1000 would generate bogus packets. I can't blame the
hardware for gagging if the upper layers feed it garbage.

This was a new bug in 2.6.18 introduced with GSO support.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b843a65..802a1a6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -944,7 +944,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	 * comment in that function for the gory details. -acme
 	 */
 
-	sk->sk_gso_type = SKB_GSO_TCPV6;
+	newsk->sk_gso_type = SKB_GSO_TCPV6;
 	__ip6_dst_store(newsk, dst, NULL);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
-- 
cgit v1.1


From f3166c07175c1639687288006aeabed363a921f3 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:01:03 -0700
Subject: [DCCP]: Fix typo

This fixes a small typo in net/dccp/libs/packet_history.c

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index ad98d6a..6739be1 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,5 +1,5 @@
 /*
- *  net/dccp/packet_history.h
+ *  net/dccp/packet_history.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
  *
-- 
cgit v1.1


From e6bccd357343e98db9e1fd0d487f4f924e1a7921 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:01:30 -0700
Subject: [DCCP]: Update contact details and copyright

Just updating copyright and contacts

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c              | 4 ++--
 net/dccp/ccids/ccid3.h              | 4 ++--
 net/dccp/ccids/lib/loss_interval.c  | 2 +-
 net/dccp/ccids/lib/loss_interval.h  | 2 +-
 net/dccp/ccids/lib/packet_history.c | 6 +++---
 net/dccp/ccids/lib/packet_history.h | 4 ++--
 net/dccp/ccids/lib/tfrc.h           | 2 +-
 net/dccp/ccids/lib/tfrc_equation.c  | 2 +-
 net/dccp/dccp.h                     | 2 +-
 net/dccp/options.c                  | 2 +-
 10 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c39bff7..0f85970 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -2,7 +2,7 @@
  *  net/dccp/ccids/ccid3.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005-6 Ian McDonald <imcdnzl@gmail.com>
+ *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
  *  An implementation of the DCCP protocol
  *
@@ -1230,7 +1230,7 @@ static __exit void ccid3_module_exit(void)
 }
 module_exit(ccid3_module_exit);
 
-MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
 	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
 MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
 MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 5ade4f6..22cb9f8 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,13 +1,13 @@
 /*
  *  net/dccp/ccids/ccid3.h
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
  *
  *  An implementation of the DCCP protocol
  *
  *  This code has been developed by the University of Waikato WAND
  *  research group. For further information please see http://www.wand.net.nz/
- *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
  *
  *  This code also uses code from Lulea University, rereleased as GPL by its
  *  authors:
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 5d7b7d8..b93d9fc 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -2,7 +2,7 @@
  *  net/dccp/ccids/lib/loss_interval.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  This program is free software; you can redistribute it and/or modify
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index 43bf782..dcb370a 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -4,7 +4,7 @@
  *  net/dccp/ccids/lib/loss_interval.h
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *
  *  This program is free software; you can redistribute it and/or modify it
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 6739be1..7b6b03e 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -1,13 +1,13 @@
 /*
  *  net/dccp/packet_history.c
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
  *
  *  An implementation of the DCCP protocol
  *
  *  This code has been developed by the University of Waikato WAND
  *  research group. For further information please see http://www.wand.net.nz/
- *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
  *
  *  This code also uses code from Lulea University, rereleased as GPL by its
  *  authors:
@@ -391,7 +391,7 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list)
 
 EXPORT_SYMBOL_GPL(dccp_tx_hist_purge);
 
-MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz>, "
+MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, "
 	      "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
 MODULE_DESCRIPTION("DCCP TFRC library");
 MODULE_LICENSE("GPL");
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 673c209..27c4309 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -1,13 +1,13 @@
 /*
  *  net/dccp/packet_history.h
  *
- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *  Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
  *
  *  An implementation of the DCCP protocol
  *
  *  This code has been developed by the University of Waikato WAND
  *  research group. For further information please see http://www.wand.net.nz/
- *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
  *
  *  This code also uses code from Lulea University, rereleased as GPL by its
  *  authors:
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 130c4c4..45f30f5 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -4,7 +4,7 @@
  *  net/dccp/ccids/lib/tfrc.h
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
  *
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 4fd2ebe..44076e0 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -2,7 +2,7 @@
  *  net/dccp/ccids/lib/tfrc_equation.c
  *
  *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
  *
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index d00a2f4..b8931d3 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -5,7 +5,7 @@
  *
  *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
  *	This program is free software; you can redistribute it and/or modify it
  *	under the terms of the GNU General Public License version 2 as
diff --git a/net/dccp/options.c b/net/dccp/options.c
index daf72bb..07a3469 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -4,7 +4,7 @@
  *  An implementation of the DCCP protocol
  *  Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- *  Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
+ *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
  *
  *      This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
-- 
cgit v1.1


From 837d107cd101fbf734e9ea2bbb5c7336a329e432 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:06:42 -0700
Subject: [DCCP]: Introduces follows48 function

This adds a new function to see if two sequence numbers follow each
other.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index b8931d3..a5c5475 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -81,6 +81,14 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
 	return after48(seq1, seq2) ? seq1 : seq2;
 }
 
+/* is seq1 next seqno after seq2 */
+static inline int follows48(const u64 seq1, const u64 seq2)
+{
+	int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
+
+	return diff==1;
+}
+
 enum {
 	DCCP_MIB_NUM = 0,
 	DCCP_MIB_ACTIVEOPENS,			/* ActiveOpens */
-- 
cgit v1.1


From 80193aee18bc862e284ba18504f3a3e14706a997 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 19:07:36 -0700
Subject: [DCCP]: Introduce dccp_rx_hist_find_entry

This adds a new function dccp_rx_hist_find_entry.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/packet_history.c | 19 +++++++++++++++++++
 net/dccp/ccids/lib/packet_history.h |  2 ++
 2 files changed, 21 insertions(+)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 7b6b03e..420c60f 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -365,6 +365,25 @@ struct dccp_tx_hist_entry *
 
 EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry);
 
+int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+   u8 *ccval)
+{
+	struct dccp_rx_hist_entry *packet = NULL, *entry;
+
+	list_for_each_entry(entry, list, dccphrx_node)
+		if (entry->dccphrx_seqno == seq) {
+			packet = entry;
+			break;
+		}
+
+	if (packet)
+		*ccval = packet->dccphrx_ccval;
+
+	return packet != NULL;
+}
+
+EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry);
+
 void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist,
 			      struct list_head *list,
 			      struct dccp_tx_hist_entry *packet)
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 27c4309..aea9c5d 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -106,6 +106,8 @@ static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist,
 extern struct dccp_tx_hist_entry *
 			dccp_tx_hist_find_entry(const struct list_head *list,
 						const u64 seq);
+extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq,
+   u8 *ccval);
 
 static inline void dccp_tx_hist_add_entry(struct list_head *list,
 					  struct dccp_tx_hist_entry *entry)
-- 
cgit v1.1


From 3a13813e6effcfad5910d47b15b724621b50b878 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Sat, 26 Aug 2006 20:28:30 -0700
Subject: [BRIDGE] netfilter: memory corruption fix

The bridge-netfilter code will overwrite memory if there is not
headroom in the skb to save the header.  This first showed up when
using Xen with sky2 driver that doesn't allocate the extra space.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6ccd32b..864fbbc 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -40,11 +40,15 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 	else {
 #ifdef CONFIG_BRIDGE_NETFILTER
 		/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
-		nf_bridge_maybe_copy_header(skb);
+		if (nf_bridge_maybe_copy_header(skb))
+			kfree_skb(skb);
+		else
 #endif
-		skb_push(skb, ETH_HLEN);
+		{
+			skb_push(skb, ETH_HLEN);
 
-		dev_queue_xmit(skb);
+			dev_queue_xmit(skb);
+		}
 	}
 
 	return 0;
-- 
cgit v1.1


From 66a377c5041e1e399633153c8b500d457281e7c1 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Sat, 26 Aug 2006 23:40:50 -0700
Subject: [DCCP]: Fix CCID3

This fixes CCID3 to give much closer performance to RFC4342.

CCID3 is meant to alter sending rate based on RTT and loss.

The performance was verified against:
http://wand.net.nz/~perry/max_download.php

For example I tested with netem and had the following parameters:
Delayed Acks 1, MSS 256 bytes, RTT 105 ms, packet loss 5%.

This gives a theoretical speed of 71.9 Kbits/s. I measured across three
runs with this patch set and got 70.1 Kbits/s. Without this patchset the
average was 232 Kbits/s which means Linux can't be used for CCID3 research
properly.

I also tested with netem turned off so box just acting as router with 1.2
msec RTT. The performance with this is the same with or without the patch
at around 30 Mbit/s.

Signed off by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c              | 149 ++++++++++++++++++++++++++++--------
 net/dccp/ccids/ccid3.h              |   5 +-
 net/dccp/ccids/lib/loss_interval.c  |  34 ++++----
 net/dccp/ccids/lib/loss_interval.h  |   7 +-
 net/dccp/ccids/lib/packet_history.c | 141 +++-------------------------------
 net/dccp/ccids/lib/packet_history.h |  11 +--
 6 files changed, 154 insertions(+), 193 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0f85970..090bc39 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -342,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
 		new_packet->dccphtx_ccval =
 			DCCP_SKB_CB(skb)->dccpd_ccval =
 				hctx->ccid3hctx_last_win_count;
+		timeval_add_usecs(&hctx->ccid3hctx_t_nom,
+				  hctx->ccid3hctx_t_ipi);
 	}
 out:
 	return rc;
@@ -413,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
 		if (len > 0) {
-			hctx->ccid3hctx_t_nom = now;
+			timeval_sub_usecs(&hctx->ccid3hctx_t_nom,
+				  hctx->ccid3hctx_t_ipi);
 			ccid3_calc_new_t_ipi(hctx);
 			ccid3_calc_new_delta(hctx);
 			timeval_add_usecs(&hctx->ccid3hctx_t_nom,
@@ -757,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
 	}
 
 	hcrx->ccid3hcrx_tstamp_last_feedback = now;
-	hcrx->ccid3hcrx_last_counter	     = packet->dccphrx_ccval;
-	hcrx->ccid3hcrx_seqno_last_counter   = packet->dccphrx_seqno;
+	hcrx->ccid3hcrx_ccval_last_counter   = packet->dccphrx_ccval;
 	hcrx->ccid3hcrx_bytes_recv	     = 0;
 
 	/* Convert to multiples of 10us */
@@ -782,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
 		return 0;
 
-	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+	DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter;
 
 	if (dccp_packet_without_ack(skb))
 		return 0;
@@ -854,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
 		interval = 1;
 	}
 found:
+	if (!tail) {
+		LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n",
+		   __FUNCTION__);
+		return ~0;
+	}
 	rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval;
 	ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
 		       dccp_role(sk), sk, rtt);
@@ -864,9 +871,20 @@ found:
 	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
 	x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
 
+	if (x_recv == 0)
+		x_recv = hcrx->ccid3hcrx_x_recv;
+
 	tmp1 = (u64)x_recv * (u64)rtt;
 	do_div(tmp1,10000000);
 	tmp2 = (u32)tmp1;
+
+	if (!tmp2) {
+		LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 "
+		   "%s: x_recv = %u, rtt =%u\n",
+		   __FUNCTION__, x_recv, rtt);
+		return ~0;
+	}
+
 	fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
 	/* do not alter order above or you will get overflow on 32 bit */
 	p = tfrc_calc_x_reverse_lookup(fval);
@@ -882,31 +900,101 @@ found:
 static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+	struct dccp_li_hist_entry *next, *head;
+	u64 seq_temp;
 
-	if (seq_loss != DCCP_MAX_SEQNO + 1 &&
-	    list_empty(&hcrx->ccid3hcrx_li_hist)) {
-		struct dccp_li_hist_entry *li_tail;
+	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
+		if (!dccp_li_hist_interval_new(ccid3_li_hist,
+		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
+			return;
 
-		li_tail = dccp_li_hist_interval_new(ccid3_li_hist,
-						    &hcrx->ccid3hcrx_li_hist,
-						    seq_loss, win_loss);
-		if (li_tail == NULL)
+		next = (struct dccp_li_hist_entry *)
+		   hcrx->ccid3hcrx_li_hist.next;
+		next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+	} else {
+		struct dccp_li_hist_entry *entry;
+		struct list_head *tail;
+
+		head = (struct dccp_li_hist_entry *)
+		   hcrx->ccid3hcrx_li_hist.next;
+		/* FIXME win count check removed as was wrong */
+		/* should make this check with receive history */
+		/* and compare there as per section 10.2 of RFC4342 */
+
+		/* new loss event detected */
+		/* calculate last interval length */
+		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
+		entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC);
+
+		if (entry == NULL) {
+			printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__);
+			dump_stack();
 			return;
-		li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
-	} else
-		    LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of "
-				   "interval\n", __FUNCTION__);
+		}
+
+		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);
+
+		tail = hcrx->ccid3hcrx_li_hist.prev;
+		list_del(tail);
+		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
+
+		/* Create the newest interval */
+		entry->dccplih_seqno = seq_loss;
+		entry->dccplih_interval = seq_temp;
+		entry->dccplih_win_count = win_loss;
+	}
 }
 
-static void ccid3_hc_rx_detect_loss(struct sock *sk)
+static int ccid3_hc_rx_detect_loss(struct sock *sk,
+                                    struct dccp_rx_hist_entry *packet)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
-	u8 win_loss;
-	const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
-						      &hcrx->ccid3hcrx_li_hist,
-						      &win_loss);
+	struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist);
+	u64 seqno = packet->dccphrx_seqno;
+	u64 tmp_seqno;
+	int loss = 0;
+	u8 ccval;
+
+
+	tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+
+	if (!rx_hist ||
+	   follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+		hcrx->ccid3hcrx_seqno_nonloss = seqno;
+		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+		goto detect_out;
+	}
+
 
-	ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
+	   > TFRC_RECV_NUM_LATE_LOSS) {
+		loss = 1;
+		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
+		   hcrx->ccid3hcrx_ccval_nonloss);
+		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+		dccp_inc_seqno(&tmp_seqno);
+		hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+		dccp_inc_seqno(&tmp_seqno);
+		while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
+		   tmp_seqno, &ccval)) {
+		   	hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
+			hcrx->ccid3hcrx_ccval_nonloss = ccval;
+			dccp_inc_seqno(&tmp_seqno);
+		}
+	}
+
+	/* FIXME - this code could be simplified with above while */
+	/* but works at moment */
+	if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
+		hcrx->ccid3hcrx_seqno_nonloss = seqno;
+		hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval;
+	}
+
+detect_out:
+	dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
+		   &hcrx->ccid3hcrx_li_hist, packet,
+		   hcrx->ccid3hcrx_seqno_nonloss);
+	return loss;
 }
 
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -916,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_rx_hist_entry *packet;
 	struct timeval now;
 	u8 win_count;
-	u32 p_prev, r_sample, t_elapsed;
-	int ins;
+	u32 p_prev, rtt_prev, r_sample, t_elapsed;
+	int loss;
 
 	BUG_ON(hcrx == NULL ||
 	       !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
@@ -932,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	case DCCP_PKT_DATAACK:
 		if (opt_recv->dccpor_timestamp_echo == 0)
 			break;
-		p_prev = hcrx->ccid3hcrx_rtt;
+		rtt_prev = hcrx->ccid3hcrx_rtt;
 		dccp_timestamp(sk, &now);
 		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
 		r_sample = timeval_usecs(&now);
@@ -951,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
 					      r_sample / 10;
 
-		if (p_prev != hcrx->ccid3hcrx_rtt)
-			ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
+		if (rtt_prev != hcrx->ccid3hcrx_rtt)
+			ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n",
 				       dccp_role(sk), hcrx->ccid3hcrx_rtt,
 				       opt_recv->dccpor_elapsed_time);
 		break;
@@ -973,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 
 	win_count = packet->dccphrx_ccval;
 
-	ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist,
-				      &hcrx->ccid3hcrx_li_hist, packet);
+	loss = ccid3_hc_rx_detect_loss(sk, packet);
 
 	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
 		return;
@@ -991,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	case TFRC_RSTATE_DATA:
 		hcrx->ccid3hcrx_bytes_recv += skb->len -
 					      dccp_hdr(skb)->dccph_doff * 4;
-		if (ins != 0)
+		if (loss)
 			break;
 
 		dccp_timestamp(sk, &now);
@@ -1012,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n",
 		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
 
-	ccid3_hc_rx_detect_loss(sk);
 	p_prev = hcrx->ccid3hcrx_p;
 	
 	/* Calculate loss event rate */
@@ -1022,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		/* Scaling up by 1000000 as fixed decimal */
 		if (i_mean != 0)
 			hcrx->ccid3hcrx_p = 1000000 / i_mean;
+	} else {
+		printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__);
+		dump_stack();
 	}
 
 	if (hcrx->ccid3hcrx_p > p_prev) {
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 22cb9f8..0a2cb75 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -120,9 +120,10 @@ struct ccid3_hc_rx_sock {
 #define ccid3hcrx_x_recv	ccid3hcrx_tfrc.tfrcrx_x_recv
 #define ccid3hcrx_rtt		ccid3hcrx_tfrc.tfrcrx_rtt
 #define ccid3hcrx_p		ccid3hcrx_tfrc.tfrcrx_p
-  	u64			ccid3hcrx_seqno_last_counter:48,
+  	u64			ccid3hcrx_seqno_nonloss:48,
+				ccid3hcrx_ccval_nonloss:4,
 				ccid3hcrx_state:8,
-				ccid3hcrx_last_counter:4;
+				ccid3hcrx_ccval_last_counter:4;
   	u32			ccid3hcrx_bytes_recv;
   	struct timeval		ccid3hcrx_tstamp_last_feedback;
   	struct timeval		ccid3hcrx_tstamp_last_ack;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index b93d9fc..906c81a 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/module.h>
+#include <net/sock.h>
 
 #include "loss_interval.h"
 
@@ -90,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 	u32 w_tot  = 0;
 
 	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
-		if (i < DCCP_LI_HIST_IVAL_F_LENGTH) {
+		if (li_entry->dccplih_interval != ~0) {
 			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
 			w_tot  += dccp_li_hist_w[i];
+			if (i != 0)
+				i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
 		}
 
-		if (i != 0)
-			i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
 
 		if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
 			break;
@@ -107,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 
 	i_tot = max(i_tot0, i_tot1);
 
-	/* FIXME: Why do we do this? -Ian McDonald */
-	if (i_tot * 4 < w_tot)
-		i_tot = w_tot * 4;
+	if (!w_tot) {
+		LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__);
+		return 1;
+	}
 
-	return i_tot * 4 / w_tot;
+	return i_tot / w_tot;
 }
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
 
-struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist,
-						     struct list_head *list,
-						     const u64 seq_loss,
-						     const u8 win_loss)
+int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+   struct list_head *list, const u64 seq_loss, const u8 win_loss)
 {
-	struct dccp_li_hist_entry *tail = NULL, *entry;
+	struct dccp_li_hist_entry *entry;
 	int i;
 
-	for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) {
+	for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
 		entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC);
 		if (entry == NULL) {
 			dccp_li_hist_purge(hist, list);
-			return NULL;
+			dump_stack();
+			return 0;
 		}
-		if (tail == NULL)
-			tail = entry;
+		entry->dccplih_interval = ~0;
 		list_add(&entry->dccplih_node, list);
 	}
 
 	entry->dccplih_seqno     = seq_loss;
 	entry->dccplih_win_count = win_loss;
-	return tail;
+	return 1;
 }
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index dcb370a..0ae85f0 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
 
 extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
 
-extern struct dccp_li_hist_entry *
-			dccp_li_hist_interval_new(struct dccp_li_hist *hist,
-						  struct list_head *list,
-						  const u64 seq_loss,
-						  const u8 win_loss);
+extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+   struct list_head *list, const u64 seq_loss, const u8 win_loss);
 #endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 420c60f..b876c9c 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -112,64 +112,27 @@ struct dccp_rx_hist_entry *
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet);
 
-int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
 			    struct list_head *rx_list,
 			    struct list_head *li_list,
-			    struct dccp_rx_hist_entry *packet)
+			    struct dccp_rx_hist_entry *packet,
+			    u64 nonloss_seqno)
 {
-	struct dccp_rx_hist_entry *entry, *next, *iter;
+	struct dccp_rx_hist_entry *entry, *next;
 	u8 num_later = 0;
 
-	iter = dccp_rx_hist_head(rx_list);
-	if (iter == NULL)
-		dccp_rx_hist_add_entry(rx_list, packet);
-	else {
-		const u64 seqno = packet->dccphrx_seqno;
-
-		if (after48(seqno, iter->dccphrx_seqno))
-			dccp_rx_hist_add_entry(rx_list, packet);
-		else {
-			if (dccp_rx_hist_entry_data_packet(iter))
-				num_later = 1;
-
-			list_for_each_entry_continue(iter, rx_list,
-						     dccphrx_node) {
-				if (after48(seqno, iter->dccphrx_seqno)) {
-					dccp_rx_hist_add_entry(&iter->dccphrx_node,
-							       packet);
-					goto trim_history;
-				}
-
-				if (dccp_rx_hist_entry_data_packet(iter))
-					num_later++;
-
-				if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
-					dccp_rx_hist_entry_delete(hist, packet);
-					return 1;
-				}
-			}
-
-			if (num_later < TFRC_RECV_NUM_LATE_LOSS)
-				dccp_rx_hist_add_entry(rx_list, packet);
-			/*
-			 * FIXME: else what? should we destroy the packet
-			 * like above?
-			 */
-		}
-	}
+	list_add(&packet->dccphrx_node, rx_list);
 
-trim_history:
-	/*
-	 * Trim history (remove all packets after the NUM_LATE_LOSS + 1
-	 * data packets)
-	 */
 	num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
 
 	if (!list_empty(li_list)) {
 		list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
 			if (num_later == 0) {
-				list_del_init(&entry->dccphrx_node);
-				dccp_rx_hist_entry_delete(hist, entry);
+				if (after48(nonloss_seqno,
+				   entry->dccphrx_seqno)) {
+					list_del_init(&entry->dccphrx_node);
+					dccp_rx_hist_entry_delete(hist, entry);
+				}
 			} else if (dccp_rx_hist_entry_data_packet(entry))
 				--num_later;
 		}
@@ -217,94 +180,10 @@ trim_history:
 				--num_later;
 		}
 	}
-
-	return 0;
 }
 
 EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet);
 
-u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
-			     struct list_head *li_list, u8 *win_loss)
-{
-	struct dccp_rx_hist_entry *entry, *next, *packet;
-	struct dccp_rx_hist_entry *a_loss = NULL;
-	struct dccp_rx_hist_entry *b_loss = NULL;
-	u64 seq_loss = DCCP_MAX_SEQNO + 1;
-	u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
-
-	list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) {
-		if (num_later == 0) {
-			b_loss = entry;
-			break;
-		} else if (dccp_rx_hist_entry_data_packet(entry))
-			--num_later;
-	}
-
-	if (b_loss == NULL)
-		goto out;
-
-	num_later = 1;
-	list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
-		if (num_later == 0) {
-			a_loss = entry;
-			break;
-		} else if (dccp_rx_hist_entry_data_packet(entry))
-			--num_later;
-	}
-
-	if (a_loss == NULL) {
-		if (list_empty(li_list)) {
-			/* no loss event have occured yet */
-			LIMIT_NETDEBUG("%s: TODO: find a lost data packet by "
-				       "comparing to initial seqno\n",
-				       __FUNCTION__);
-			goto out;
-		} else {
-			LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!",
-				       __FUNCTION__);
-			goto out;
-		}
-	}
-
-	/* Locate a lost data packet */
-	entry = packet = b_loss;
-	list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) {
-		u64 delta = dccp_delta_seqno(entry->dccphrx_seqno,
-					     packet->dccphrx_seqno);
-
-		if (delta != 0) {
-			if (dccp_rx_hist_entry_data_packet(packet))
-				--delta;
-			/*
-			 * FIXME: check this, probably this % usage is because
-			 * in earlier drafts the ndp count was just 8 bits
-			 * long, but now it cam be up to 24 bits long.
-			 */
-#if 0
-			if (delta % DCCP_NDP_LIMIT !=
-			    (packet->dccphrx_ndp -
-			     entry->dccphrx_ndp) % DCCP_NDP_LIMIT)
-#endif
-			if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) {
-				seq_loss = entry->dccphrx_seqno;
-				dccp_inc_seqno(&seq_loss);
-			}
-		}
-		packet = entry;
-		if (packet == a_loss)
-			break;
-	}
-out:
-	if (seq_loss != DCCP_MAX_SEQNO + 1)
-		*win_loss = a_loss->dccphrx_ccval;
-	else
-		*win_loss = 0; /* Paranoia */
-
-	return seq_loss;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss);
-
 struct dccp_tx_hist *dccp_tx_hist_new(const char *name)
 {
 	struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index aea9c5d..067cf1c 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -166,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist,
 extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist,
 			       struct list_head *list);
 
-static inline void dccp_rx_hist_add_entry(struct list_head *list,
-					  struct dccp_rx_hist_entry *entry)
-{
-	list_add(&entry->dccphrx_node, list);
-}
-
 static inline struct dccp_rx_hist_entry *
 		dccp_rx_hist_head(struct list_head *list)
 {
@@ -190,10 +184,11 @@ static inline int
 	       entry->dccphrx_type == DCCP_PKT_DATAACK;
 }
 
-extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
+extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist,
 				   struct list_head *rx_list,
 				   struct list_head *li_list,
-				   struct dccp_rx_hist_entry *packet);
+				   struct dccp_rx_hist_entry *packet,
+				   u64 nonloss_seqno);
 
 extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list,
 				    struct list_head *li_list, u8 *win_loss);
-- 
cgit v1.1