From 7947b20ebae785ba25154aa1a9a00a98a22de75a Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 4 Jun 2007 21:17:10 -0700
Subject: [BNX2]: Fix netdev watchdog on 5708.

There's a bug in the driver that only initializes half of the context
memory on the 5708.  Surprisingly, this works most of the time except
for some occasional netdev watchdogs when sending a lot of 64-byte
packets.  The fix is to add the missing code to initialize the 2nd
halves of all context memory.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index da7c3b0..9789f05 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1811,6 +1811,7 @@ bnx2_init_context(struct bnx2 *bp)
 	vcid = 96;
 	while (vcid) {
 		u32 vcid_addr, pcid_addr, offset;
+		int i;
 
 		vcid--;
 
@@ -1831,16 +1832,20 @@ bnx2_init_context(struct bnx2 *bp)
 			pcid_addr = vcid_addr;
 		}
 
-		REG_WR(bp, BNX2_CTX_VIRT_ADDR, 0x00);
-		REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr);
+		for (i = 0; i < (CTX_SIZE / PHY_CTX_SIZE); i++) {
+			vcid_addr += (i << PHY_CTX_SHIFT);
+			pcid_addr += (i << PHY_CTX_SHIFT);
 
-		/* Zero out the context. */
-		for (offset = 0; offset < PHY_CTX_SIZE; offset += 4) {
-			CTX_WR(bp, 0x00, offset, 0);
-		}
+			REG_WR(bp, BNX2_CTX_VIRT_ADDR, 0x00);
+			REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr);
 
-		REG_WR(bp, BNX2_CTX_VIRT_ADDR, vcid_addr);
-		REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr);
+			/* Zero out the context. */
+			for (offset = 0; offset < PHY_CTX_SIZE; offset += 4)
+				CTX_WR(bp, 0x00, offset, 0);
+
+			REG_WR(bp, BNX2_CTX_VIRT_ADDR, vcid_addr);
+			REG_WR(bp, BNX2_CTX_PAGE_TBL, pcid_addr);
+		}
 	}
 }
 
-- 
cgit v1.1


From 641bdcd56c8bb2110a31da846b2752b11a644050 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 4 Jun 2007 21:22:24 -0700
Subject: [BNX2]: Add missing wait in bnx2_init_5709_context().

For correctness, we need to wait for the MEM_INIT bit to be cleared
in the BNX2_CTX_COMMAND register before proceeding.

[Added return -EBUSY when the MEM_INIT bit doesn't clear, suggested
by Jeff Garzik.]

Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 9789f05..9eba7a2 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1778,6 +1778,15 @@ bnx2_init_5709_context(struct bnx2 *bp)
 	val = BNX2_CTX_COMMAND_ENABLED | BNX2_CTX_COMMAND_MEM_INIT | (1 << 12);
 	val |= (BCM_PAGE_BITS - 8) << 16;
 	REG_WR(bp, BNX2_CTX_COMMAND, val);
+	for (i = 0; i < 10; i++) {
+		val = REG_RD(bp, BNX2_CTX_COMMAND);
+		if (!(val & BNX2_CTX_COMMAND_MEM_INIT))
+			break;
+		udelay(2);
+	}
+	if (val & BNX2_CTX_COMMAND_MEM_INIT)
+		return -EBUSY;
+
 	for (i = 0; i < bp->ctx_pages; i++) {
 		int j;
 
@@ -3696,9 +3705,11 @@ bnx2_init_chip(struct bnx2 *bp)
 
 	/* Initialize context mapping and zero out the quick contexts.  The
 	 * context block must have already been enabled. */
-	if (CHIP_NUM(bp) == CHIP_NUM_5709)
-		bnx2_init_5709_context(bp);
-	else
+	if (CHIP_NUM(bp) == CHIP_NUM_5709) {
+		rc = bnx2_init_5709_context(bp);
+		if (rc)
+			return rc;
+	} else
 		bnx2_init_context(bp);
 
 	if ((rc = bnx2_init_cpus(bp)) != 0)
-- 
cgit v1.1


From 0aa38df7cd5b6c5b89f5146f4a2286434bc4a8f3 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 4 Jun 2007 21:23:06 -0700
Subject: [BNX2]: Enable DMA on 5709.

Add missing code to enable DMA on 5709 A1.  The bit is a no-op on A0
and therefore can be set on all 5709 chips.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c | 5 +++++
 drivers/net/bnx2.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 9eba7a2..3b7ca2a 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3815,6 +3815,11 @@ bnx2_init_chip(struct bnx2 *bp)
 	/* Initialize the receive filter. */
 	bnx2_set_rx_mode(bp->dev);
 
+	if (CHIP_NUM(bp) == CHIP_NUM_5709) {
+		val = REG_RD(bp, BNX2_MISC_NEW_CORE_CTL);
+		val |= BNX2_MISC_NEW_CORE_CTL_DMA_ENABLE;
+		REG_WR(bp, BNX2_MISC_NEW_CORE_CTL, val);
+	}
 	rc = bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT2 | BNX2_DRV_MSG_CODE_RESET,
 			  0);
 
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index bd6288d..49a5de2 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -1373,6 +1373,7 @@ struct l2_fhdr {
 #define BNX2_MISC_NEW_CORE_CTL				0x000008c8
 #define BNX2_MISC_NEW_CORE_CTL_LINK_HOLDOFF_SUCCESS	 (1L<<0)
 #define BNX2_MISC_NEW_CORE_CTL_LINK_HOLDOFF_REQ		 (1L<<1)
+#define BNX2_MISC_NEW_CORE_CTL_DMA_ENABLE		 (1L<<16)
 #define BNX2_MISC_NEW_CORE_CTL_RESERVED_CMN		 (0x3fffL<<2)
 #define BNX2_MISC_NEW_CORE_CTL_RESERVED_TC		 (0xffffL<<16)
 
-- 
cgit v1.1


From 02537b0676930b1bd9aff2139e0e645c79986931 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 4 Jun 2007 21:24:07 -0700
Subject: [BNX2]: Fix occasional counter corruption on 5708.

The statistics block DMA on 5708 can be messed up occasionally on the
average of about once per hour.  If the user is reading the counters
within one second after the corruption, the counters will be all
messed up.  One second later, the counters will be ok again until the
next corruption occurs.

The workaround is to disable the periodic statistics DMA.  Instead,
we manually trigger the DMA once a second in bnx2_timer().  This
manual trigger of the DMA avoids the problem.

As a consequence, we can only allow 0 or 1 second settings for
ethtool -C statistics block.

Thanks to Jean-Daniel Pauget <jd@disjunkt.com> and
CaT <cat@zip.com.au> for reporting this rare problem.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 3b7ca2a..5046b0f 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3788,7 +3788,10 @@ bnx2_init_chip(struct bnx2 *bp)
 	REG_WR(bp, BNX2_HC_CMD_TICKS,
 	       (bp->cmd_ticks_int << 16) | bp->cmd_ticks);
 
-	REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks & 0xffff00);
+	if (CHIP_NUM(bp) == CHIP_NUM_5708)
+		REG_WR(bp, BNX2_HC_STATS_TICKS, 0);
+	else
+		REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks & 0xffff00);
 	REG_WR(bp, BNX2_HC_STAT_COLLECT_TICKS, 0xbb8);  /* 3ms */
 
 	if (CHIP_ID(bp) == CHIP_ID_5706_A1)
@@ -4641,6 +4644,11 @@ bnx2_timer(unsigned long data)
 
 	bp->stats_blk->stat_FwRxDrop = REG_RD_IND(bp, BNX2_FW_RX_DROP_COUNT);
 
+	/* workaround occasional corrupted counters */
+	if (CHIP_NUM(bp) == CHIP_NUM_5708 && bp->stats_ticks)
+		REG_WR(bp, BNX2_HC_COMMAND, bp->hc_cmd |
+					    BNX2_HC_COMMAND_STATS_NOW);
+
 	if (bp->phy_flags & PHY_SERDES_FLAG) {
 		if (CHIP_NUM(bp) == CHIP_NUM_5706)
 			bnx2_5706_serdes_timer(bp);
@@ -5438,6 +5446,10 @@ bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
 		0xff;
 
 	bp->stats_ticks = coal->stats_block_coalesce_usecs;
+	if (CHIP_NUM(bp) == CHIP_NUM_5708) {
+		if (bp->stats_ticks != 0 && bp->stats_ticks != USEC_PER_SEC)
+			bp->stats_ticks = USEC_PER_SEC;
+	}
 	if (bp->stats_ticks > 0xffff00) bp->stats_ticks = 0xffff00;
 	bp->stats_ticks &= 0xffff00;
 
-- 
cgit v1.1


From b91b9fd11210a7023f37eaee1e977ad9ce532095 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 4 Jun 2007 21:24:42 -0700
Subject: [BNX2]: Update version and reldate.

Update to version 1.5.11.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Acked-by: Jeff Garzik <jeff@garzik.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 5046b0f..ce3ed67 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -54,8 +54,8 @@
 
 #define DRV_MODULE_NAME		"bnx2"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"1.5.10"
-#define DRV_MODULE_RELDATE	"May 1, 2007"
+#define DRV_MODULE_VERSION	"1.5.11"
+#define DRV_MODULE_RELDATE	"June 4, 2007"
 
 #define RUN_AT(x) (jiffies + (x))
 
-- 
cgit v1.1


From f0e48dbfc5c74e967fea4c0fd0c5ad07557ae0c8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 4 Jun 2007 21:32:46 -0700
Subject: [TCP]: Honour sk_bound_dev_if in tcp_v4_send_ack

A time_wait socket inherits sk_bound_dev_if from the original socket,
but it is not used when sending ACK packets using ip_send_reply.

Fix by passing the oif to ip_send_reply in struct ip_reply_arg and
use it for output routing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h     | 1 +
 net/ipv4/ip_output.c | 4 +++-
 net/ipv4/tcp_ipv4.c  | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index bb207db..abf2820 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -143,6 +143,7 @@ struct ip_reply_arg {
 	__wsum 	    csum;
 	int	    csumoffset; /* u16 offset of csum in iov[0].iov_base */
 				/* -1 if not needed */ 
+	int	    bound_dev_if;
 }; 
 
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d6427d9..34ea454 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1352,7 +1352,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 	}
 
 	{
-		struct flowi fl = { .nl_u = { .ip4_u =
+		struct flowi fl = { .oif = arg->bound_dev_if,
+				    .nl_u = { .ip4_u =
 					      { .daddr = daddr,
 						.saddr = rt->rt_spec_dst,
 						.tos = RT_TOS(ip_hdr(skb)->tos) } },
@@ -1376,6 +1377,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 	inet->tos = ip_hdr(skb)->tos;
 	sk->sk_priority = skb->priority;
 	sk->sk_protocol = ip_hdr(skb)->protocol;
+	sk->sk_bound_dev_if = arg->bound_dev_if;
 	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
 		       &ipc, rt, MSG_DONTWAIT);
 	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 47c6105..97e294e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -705,6 +705,8 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 				      ip_hdr(skb)->saddr, /* XXX */
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+	if (twsk)
+		arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
 
 	ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
 
-- 
cgit v1.1


From 8d76527e728d00d1cf9d5dd663caffb2dcf05ae6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 4 Jun 2007 23:34:08 -0700
Subject: [IPV4]: Only panic if inetdev_init fails for loopback

When I made the inetdev_init call work on all devices I incorrectly
left in the panic call as well.  It is obviously undesirable to
panic on an allocation failure for a normal network device.  This
patch moves the panic call under the loopback if clause.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7f95e6e..88a22d2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1057,9 +1057,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	if (!in_dev) {
 		if (event == NETDEV_REGISTER) {
 			in_dev = inetdev_init(dev);
-			if (!in_dev)
-				panic("devinet: Failed to create loopback\n");
 			if (dev == &loopback_dev) {
+				if (!in_dev)
+					panic("devinet: "
+					      "Failed to create loopback\n");
 				in_dev->cnf.no_xfrm = 1;
 				in_dev->cnf.no_policy = 1;
 			}
-- 
cgit v1.1


From 42f811b8bcdf6695bf74de580b1daf53445e8949 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 4 Jun 2007 23:34:44 -0700
Subject: [IPV4]: Convert IPv4 devconf to an array

This patch converts the ipv4_devconf config members (everything except
sysctl) to an array.  This allows easier manipulation which will be
needed later on to provide better management of default config values.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/amso1100/c2.c |   2 +-
 include/linux/inetdevice.h          |  94 +++++++------
 net/ipv4/arp.c                      |  11 +-
 net/ipv4/devinet.c                  | 264 ++++++++++--------------------------
 net/ipv4/igmp.c                     |  18 +--
 net/ipv4/ipmr.c                     |  12 +-
 net/ipv4/proc.c                     |   2 +-
 net/ipv4/route.c                    |  14 +-
 net/ipv4/sysctl_net_ipv4.c          |   6 +-
 9 files changed, 163 insertions(+), 260 deletions(-)

diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 58bc272..0aecea6 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -672,7 +672,7 @@ static int c2_up(struct net_device *netdev)
 	 * rdma interface.
 	 */
 	in_dev = in_dev_get(netdev);
-	in_dev->cnf.arp_ignore = 1;
+	IN_DEV_CONF_SET(in_dev, ARP_IGNORE, 1);
 	in_dev_put(in_dev);
 
 	return 0;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index c0f7aec..1ef174d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -10,28 +10,8 @@
 
 struct ipv4_devconf
 {
-	int	accept_redirects;
-	int	send_redirects;
-	int	secure_redirects;
-	int	shared_media;
-	int	accept_source_route;
-	int	rp_filter;
-	int	proxy_arp;
-	int	bootp_relay;
-	int	log_martians;
-	int	forwarding;
-	int	mc_forwarding;
-	int	tag;
-	int     arp_filter;
-	int	arp_announce;
-	int	arp_ignore;
-	int	arp_accept;
-	int	medium_id;
-	int	no_xfrm;
-	int	no_policy;
-	int	force_igmp_version;
-	int	promote_secondaries;
 	void	*sysctl;
+	int	data[__NET_IPV4_CONF_MAX - 1];
 };
 
 extern struct ipv4_devconf ipv4_devconf;
@@ -60,30 +40,64 @@ struct in_device
 	struct rcu_head		rcu_head;
 };
 
-#define IN_DEV_FORWARD(in_dev)		((in_dev)->cnf.forwarding)
-#define IN_DEV_MFORWARD(in_dev)		(ipv4_devconf.mc_forwarding && (in_dev)->cnf.mc_forwarding)
-#define IN_DEV_RPFILTER(in_dev)		(ipv4_devconf.rp_filter && (in_dev)->cnf.rp_filter)
-#define IN_DEV_SOURCE_ROUTE(in_dev)	(ipv4_devconf.accept_source_route && (in_dev)->cnf.accept_source_route)
-#define IN_DEV_BOOTP_RELAY(in_dev)	(ipv4_devconf.bootp_relay && (in_dev)->cnf.bootp_relay)
-
-#define IN_DEV_LOG_MARTIANS(in_dev)	(ipv4_devconf.log_martians || (in_dev)->cnf.log_martians)
-#define IN_DEV_PROXY_ARP(in_dev)	(ipv4_devconf.proxy_arp || (in_dev)->cnf.proxy_arp)
-#define IN_DEV_SHARED_MEDIA(in_dev)	(ipv4_devconf.shared_media || (in_dev)->cnf.shared_media)
-#define IN_DEV_TX_REDIRECTS(in_dev)	(ipv4_devconf.send_redirects || (in_dev)->cnf.send_redirects)
-#define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
-#define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
-#define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
-#define IN_DEV_PROMOTE_SECONDARIES(in_dev)	(ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
+#define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1])
+#define IPV4_DEVCONF_ALL(attr) IPV4_DEVCONF(ipv4_devconf, attr)
+
+static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
+{
+	index--;
+	return in_dev->cnf.data[index];
+}
+
+static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
+				    int val)
+{
+	index--;
+	in_dev->cnf.data[index] = val;
+}
+
+#define IN_DEV_CONF_GET(in_dev, attr) \
+	ipv4_devconf_get((in_dev), NET_IPV4_CONF_ ## attr)
+#define IN_DEV_CONF_SET(in_dev, attr, val) \
+	ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val))
+
+#define IN_DEV_ANDCONF(in_dev, attr) \
+	(IPV4_DEVCONF_ALL(attr) && IN_DEV_CONF_GET((in_dev), attr))
+#define IN_DEV_ORCONF(in_dev, attr) \
+	(IPV4_DEVCONF_ALL(attr) || IN_DEV_CONF_GET((in_dev), attr))
+#define IN_DEV_MAXCONF(in_dev, attr) \
+	(max(IPV4_DEVCONF_ALL(attr), IN_DEV_CONF_GET((in_dev), attr)))
+
+#define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
+#define IN_DEV_MFORWARD(in_dev)		(IPV4_DEVCONF_ALL(MC_FORWARDING) && \
+					 IPV4_DEVCONF((in_dev)->cnf, \
+						      MC_FORWARDING))
+#define IN_DEV_RPFILTER(in_dev)		IN_DEV_ANDCONF((in_dev), RP_FILTER)
+#define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
+						       ACCEPT_SOURCE_ROUTE)
+#define IN_DEV_BOOTP_RELAY(in_dev)	IN_DEV_ANDCONF((in_dev), BOOTP_RELAY)
+
+#define IN_DEV_LOG_MARTIANS(in_dev)	IN_DEV_ORCONF((in_dev), LOG_MARTIANS)
+#define IN_DEV_PROXY_ARP(in_dev)	IN_DEV_ORCONF((in_dev), PROXY_ARP)
+#define IN_DEV_SHARED_MEDIA(in_dev)	IN_DEV_ORCONF((in_dev), SHARED_MEDIA)
+#define IN_DEV_TX_REDIRECTS(in_dev)	IN_DEV_ORCONF((in_dev), SEND_REDIRECTS)
+#define IN_DEV_SEC_REDIRECTS(in_dev)	IN_DEV_ORCONF((in_dev), \
+						      SECURE_REDIRECTS)
+#define IN_DEV_IDTAG(in_dev)		IN_DEV_CONF_GET(in_dev, TAG)
+#define IN_DEV_MEDIUM_ID(in_dev)	IN_DEV_CONF_GET(in_dev, MEDIUM_ID)
+#define IN_DEV_PROMOTE_SECONDARIES(in_dev) \
+					IN_DEV_ORCONF((in_dev), \
+						      PROMOTE_SECONDARIES)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects && (in_dev)->cnf.accept_redirects)) \
+	  IN_DEV_ANDCONF((in_dev), ACCEPT_REDIRECTS)) \
 	 || (!IN_DEV_FORWARD(in_dev) && \
-	  (ipv4_devconf.accept_redirects || (in_dev)->cnf.accept_redirects)))
+	  IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS)))
 
-#define IN_DEV_ARPFILTER(in_dev)	(ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter)
-#define IN_DEV_ARP_ANNOUNCE(in_dev)	(max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce))
-#define IN_DEV_ARP_IGNORE(in_dev)	(max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore))
+#define IN_DEV_ARPFILTER(in_dev)	IN_DEV_ORCONF((in_dev), ARPFILTER)
+#define IN_DEV_ARP_ANNOUNCE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
+#define IN_DEV_ARP_IGNORE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_IGNORE)
 
 struct in_ifaddr
 {
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7110779..e00767e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -877,7 +877,7 @@ static int arp_process(struct sk_buff *skb)
 
 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
 
-	if (ipv4_devconf.arp_accept) {
+	if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) {
 		/* Unsolicited ARP is not accepted by default.
 		   It is possible, that this option should be enabled for some
 		   devices (strip is candidate)
@@ -987,11 +987,11 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev)
 			return 0;
 		}
 		if (dev == NULL) {
-			ipv4_devconf.proxy_arp = 1;
+			IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
 			return 0;
 		}
 		if (__in_dev_get_rtnl(dev)) {
-			__in_dev_get_rtnl(dev)->cnf.proxy_arp = 1;
+			IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, 1);
 			return 0;
 		}
 		return -ENXIO;
@@ -1093,11 +1093,12 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
 			return pneigh_delete(&arp_tbl, &ip, dev);
 		if (mask == 0) {
 			if (dev == NULL) {
-				ipv4_devconf.proxy_arp = 0;
+				IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
 				return 0;
 			}
 			if (__in_dev_get_rtnl(dev)) {
-				__in_dev_get_rtnl(dev)->cnf.proxy_arp = 0;
+				IN_DEV_CONF_SET(__in_dev_get_rtnl(dev),
+						PROXY_ARP, 0);
 				return 0;
 			}
 			return -ENXIO;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 88a22d2..0094066 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,20 +64,26 @@
 #include <net/rtnetlink.h>
 
 struct ipv4_devconf ipv4_devconf = {
-	.accept_redirects = 1,
-	.send_redirects =  1,
-	.secure_redirects = 1,
-	.shared_media =	  1,
+	.data = {
+		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
+		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
+	},
 };
 
 static struct ipv4_devconf ipv4_devconf_dflt = {
-	.accept_redirects =  1,
-	.send_redirects =    1,
-	.secure_redirects =  1,
-	.shared_media =	     1,
-	.accept_source_route = 1,
+	.data = {
+		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
+		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
+		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
+	},
 };
 
+#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
+
 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
 	[IFA_LOCAL]     	= { .type = NLA_U32 },
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
@@ -1061,8 +1067,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 				if (!in_dev)
 					panic("devinet: "
 					      "Failed to create loopback\n");
-				in_dev->cnf.no_xfrm = 1;
-				in_dev->cnf.no_policy = 1;
+				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
+				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
 			}
 		}
 		goto out;
@@ -1241,10 +1247,10 @@ errout:
 void inet_forward_change(void)
 {
 	struct net_device *dev;
-	int on = ipv4_devconf.forwarding;
+	int on = IPV4_DEVCONF_ALL(FORWARDING);
 
-	ipv4_devconf.accept_redirects = !on;
-	ipv4_devconf_dflt.forwarding = on;
+	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
+	IPV4_DEVCONF_DFLT(FORWARDING) = on;
 
 	read_lock(&dev_base_lock);
 	for_each_netdev(dev) {
@@ -1252,7 +1258,7 @@ void inet_forward_change(void)
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(dev);
 		if (in_dev)
-			in_dev->cnf.forwarding = on;
+			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
 		rcu_read_unlock();
 	}
 	read_unlock(&dev_base_lock);
@@ -1269,9 +1275,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
-		if (valp == &ipv4_devconf.forwarding)
+		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
 			inet_forward_change();
-		else if (valp != &ipv4_devconf_dflt.forwarding)
+		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
 			rt_cache_flush(0);
 	}
 
@@ -1333,6 +1339,31 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 }
 
 
+#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
+	{ \
+		.ctl_name	= NET_IPV4_CONF_ ## attr, \
+		.procname	= name, \
+		.data		= ipv4_devconf.data + \
+				  NET_IPV4_CONF_ ## attr - 1, \
+		.maxlen		= sizeof(int), \
+		.mode		= mval, \
+		.proc_handler	= proc, \
+		.strategy	= sysctl, \
+	}
+
+#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
+	DEVINET_SYSCTL_ENTRY(attr, name, 0644, &proc_dointvec, NULL)
+
+#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
+	DEVINET_SYSCTL_ENTRY(attr, name, 0444, &proc_dointvec, NULL)
+
+#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
+	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
+
+#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
+	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
+				     ipv4_doint_and_flush_strategy)
+
 static struct devinet_sysctl_table {
 	struct ctl_table_header *sysctl_header;
 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
@@ -1342,178 +1373,33 @@ static struct devinet_sysctl_table {
 	ctl_table		devinet_root_dir[2];
 } devinet_sysctl = {
 	.devinet_vars = {
-		{
-			.ctl_name	= NET_IPV4_CONF_FORWARDING,
-			.procname	= "forwarding",
-			.data		= &ipv4_devconf.forwarding,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &devinet_sysctl_forward,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_MC_FORWARDING,
-			.procname	= "mc_forwarding",
-			.data		= &ipv4_devconf.mc_forwarding,
-			.maxlen		= sizeof(int),
-			.mode		= 0444,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_ACCEPT_REDIRECTS,
-			.procname	= "accept_redirects",
-			.data		= &ipv4_devconf.accept_redirects,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_SECURE_REDIRECTS,
-			.procname	= "secure_redirects",
-			.data		= &ipv4_devconf.secure_redirects,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_SHARED_MEDIA,
-			.procname	= "shared_media",
-			.data		= &ipv4_devconf.shared_media,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_RP_FILTER,
-			.procname	= "rp_filter",
-			.data		= &ipv4_devconf.rp_filter,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_SEND_REDIRECTS,
-			.procname	= "send_redirects",
-			.data		= &ipv4_devconf.send_redirects,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
-			.procname	= "accept_source_route",
-			.data		= &ipv4_devconf.accept_source_route,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_PROXY_ARP,
-			.procname	= "proxy_arp",
-			.data		= &ipv4_devconf.proxy_arp,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_MEDIUM_ID,
-			.procname	= "medium_id",
-			.data		= &ipv4_devconf.medium_id,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_BOOTP_RELAY,
-			.procname	= "bootp_relay",
-			.data		= &ipv4_devconf.bootp_relay,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_LOG_MARTIANS,
-			.procname	= "log_martians",
-			.data		= &ipv4_devconf.log_martians,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_TAG,
-			.procname	= "tag",
-			.data		= &ipv4_devconf.tag,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_ARPFILTER,
-			.procname	= "arp_filter",
-			.data		= &ipv4_devconf.arp_filter,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_ARP_ANNOUNCE,
-			.procname	= "arp_announce",
-			.data		= &ipv4_devconf.arp_announce,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_ARP_IGNORE,
-			.procname	= "arp_ignore",
-			.data		= &ipv4_devconf.arp_ignore,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_ARP_ACCEPT,
-			.procname	= "arp_accept",
-			.data		= &ipv4_devconf.arp_accept,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &proc_dointvec,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_NOXFRM,
-			.procname	= "disable_xfrm",
-			.data		= &ipv4_devconf.no_xfrm,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &ipv4_doint_and_flush,
-			.strategy	= &ipv4_doint_and_flush_strategy,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_NOPOLICY,
-			.procname	= "disable_policy",
-			.data		= &ipv4_devconf.no_policy,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &ipv4_doint_and_flush,
-			.strategy	= &ipv4_doint_and_flush_strategy,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_FORCE_IGMP_VERSION,
-			.procname	= "force_igmp_version",
-			.data		= &ipv4_devconf.force_igmp_version,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &ipv4_doint_and_flush,
-			.strategy	= &ipv4_doint_and_flush_strategy,
-		},
-		{
-			.ctl_name	= NET_IPV4_CONF_PROMOTE_SECONDARIES,
-			.procname	= "promote_secondaries",
-			.data		= &ipv4_devconf.promote_secondaries,
-			.maxlen		= sizeof(int),
-			.mode		= 0644,
-			.proc_handler	= &ipv4_doint_and_flush,
-			.strategy	= &ipv4_doint_and_flush_strategy,
-		},
+		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
+					     devinet_sysctl_forward, NULL),
+		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+
+		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
+		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
+		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
+		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
+		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
+		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
+					"accept_source_route"),
+		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
+		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
+		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
+		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
+		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
+		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
+		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
+		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
+		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+
+		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
+		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
+		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
+					      "force_igmp_version"),
+		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
+					      "promote_secondaries"),
 	},
 	.devinet_dev = {
 		{
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f4dd474..a646409 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -128,14 +128,16 @@
  * contradict to specs provided this delay is small enough.
  */
 
-#define IGMP_V1_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 1 || \
-		(in_dev)->cnf.force_igmp_version == 1 || \
-		((in_dev)->mr_v1_seen && \
-		time_before(jiffies, (in_dev)->mr_v1_seen)))
-#define IGMP_V2_SEEN(in_dev) (ipv4_devconf.force_igmp_version == 2 || \
-		(in_dev)->cnf.force_igmp_version == 2 || \
-		((in_dev)->mr_v2_seen && \
-		time_before(jiffies, (in_dev)->mr_v2_seen)))
+#define IGMP_V1_SEEN(in_dev) \
+	(IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \
+	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
+	 ((in_dev)->mr_v1_seen && \
+	  time_before(jiffies, (in_dev)->mr_v1_seen)))
+#define IGMP_V2_SEEN(in_dev) \
+	(IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \
+	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
+	 ((in_dev)->mr_v2_seen && \
+	  time_before(jiffies, (in_dev)->mr_v2_seen)))
 
 static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
 static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 0ebae41..d570d3a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -154,7 +154,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
 			in_dev = __in_dev_get_rtnl(dev);
 			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
 				goto failure;
-			in_dev->cnf.rp_filter = 0;
+			IN_DEV_CONF_SET(in_dev, RP_FILTER, 0);
 
 			if (dev_open(dev))
 				goto failure;
@@ -221,7 +221,7 @@ static struct net_device *ipmr_reg_vif(void)
 	if ((in_dev = inetdev_init(dev)) == NULL)
 		goto failure;
 
-	in_dev->cnf.rp_filter = 0;
+	IN_DEV_CONF_SET(in_dev, RP_FILTER, 0);
 
 	if (dev_open(dev))
 		goto failure;
@@ -281,7 +281,7 @@ static int vif_delete(int vifi)
 	dev_set_allmulti(dev, -1);
 
 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
-		in_dev->cnf.mc_forwarding--;
+		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 		ip_rt_multicast_event(in_dev);
 	}
 
@@ -426,7 +426,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 
 	if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 		return -EADDRNOTAVAIL;
-	in_dev->cnf.mc_forwarding++;
+	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 	dev_set_allmulti(dev, +1);
 	ip_rt_multicast_event(in_dev);
 
@@ -841,7 +841,7 @@ static void mrtsock_destruct(struct sock *sk)
 {
 	rtnl_lock();
 	if (sk == mroute_socket) {
-		ipv4_devconf.mc_forwarding--;
+		IPV4_DEVCONF_ALL(MC_FORWARDING)--;
 
 		write_lock_bh(&mrt_lock);
 		mroute_socket=NULL;
@@ -890,7 +890,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
 			mroute_socket=sk;
 			write_unlock_bh(&mrt_lock);
 
-			ipv4_devconf.mc_forwarding++;
+			IPV4_DEVCONF_ALL(MC_FORWARDING)++;
 		}
 		rtnl_unlock();
 		return ret;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index cdbc6c1..3b690cf 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -260,7 +260,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
-			ipv4_devconf.forwarding ? 1 : 2, sysctl_ip_default_ttl);
+		   IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl);
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8603cfb..29ca63e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1636,7 +1636,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-	if (in_dev->cnf.no_policy)
+	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
 		rth->u.dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
@@ -1778,9 +1778,9 @@ static inline int __mkroute_input(struct sk_buff *skb,
 	if (res->fi->fib_nhs > 1)
 		rth->u.dst.flags |= DST_BALANCED;
 #endif
-	if (in_dev->cnf.no_policy)
+	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
 		rth->u.dst.flags |= DST_NOPOLICY;
-	if (out_dev->cnf.no_xfrm)
+	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
 		rth->u.dst.flags |= DST_NOXFRM;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
@@ -2021,7 +2021,7 @@ local_input:
 
 	atomic_set(&rth->u.dst.__refcnt, 1);
 	rth->u.dst.flags= DST_HOST;
-	if (in_dev->cnf.no_policy)
+	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
 		rth->u.dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
@@ -2218,9 +2218,9 @@ static inline int __mkroute_output(struct rtable **result,
 			rth->u.dst.flags |= DST_BALANCED;
 	}
 #endif
-	if (in_dev->cnf.no_xfrm)
+	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
 		rth->u.dst.flags |= DST_NOXFRM;
-	if (in_dev->cnf.no_policy)
+	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
 		rth->u.dst.flags |= DST_NOPOLICY;
 
 	rth->fl.fl4_dst	= oldflp->fl4_dst;
@@ -2759,7 +2759,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		__be32 dst = rt->rt_dst;
 
 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-		    ipv4_devconf.mc_forwarding) {
+		    IPV4_DEVCONF_ALL(MC_FORWARDING)) {
 			int err = ipmr_get_route(skb, r, nowait);
 			if (err <= 0) {
 				if (!nowait) {
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 6817d64..53ef0f4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -37,12 +37,12 @@ static
 int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int val = ipv4_devconf.forwarding;
+	int val = IPV4_DEVCONF_ALL(FORWARDING);
 	int ret;
 
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
-	if (write && ipv4_devconf.forwarding != val)
+	if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
 		inet_forward_change();
 
 	return ret;
@@ -222,7 +222,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_FORWARD,
 		.procname	= "ip_forward",
-		.data		= &ipv4_devconf.forwarding,
+		.data		= &IPV4_DEVCONF_ALL(FORWARDING),
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &ipv4_sysctl_forward,
-- 
cgit v1.1


From 31be308541e990592a2d0a3e77e8e51bd0cea0e0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 4 Jun 2007 23:35:37 -0700
Subject: [IPV4]: Add default config support after inetdev_init

Previously once inetdev_init has been called on a device any changes
made to ipv4_devconf_dflt would have no effect on that device's
configuration.

This creates a problem since we have moved the point where
inetdev_init is called from when an address is added to where the
device is registered.

This patch is the first half of a set that tries to mimic the old
behaviour while still calling inetdev_init.

It propagates any changes to ipv4_devconf_dflt to those devices that
have not had the corresponding attribute set.

The next patch will forcibly set all values at the point where
inetdev_init was previously called.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h |   3 +
 net/ipv4/devinet.c         | 133 +++++++++++++++++++++++++++++++++------------
 2 files changed, 101 insertions(+), 35 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 1ef174d..40adefd 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -3,6 +3,7 @@
 
 #ifdef __KERNEL__
 
+#include <linux/bitmap.h>
 #include <linux/if.h>
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
@@ -12,6 +13,7 @@ struct ipv4_devconf
 {
 	void	*sysctl;
 	int	data[__NET_IPV4_CONF_MAX - 1];
+	DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1);
 };
 
 extern struct ipv4_devconf ipv4_devconf;
@@ -53,6 +55,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
 				    int val)
 {
 	index--;
+	set_bit(index, in_dev->cnf.state);
 	in_dev->cnf.data[index] = val;
 }
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 0094066..e197347 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1244,6 +1244,91 @@ errout:
 
 #ifdef CONFIG_SYSCTL
 
+static void devinet_copy_dflt_conf(int i)
+{
+	struct net_device *dev;
+
+	read_lock(&dev_base_lock);
+	for_each_netdev(dev) {
+		struct in_device *in_dev;
+		rcu_read_lock();
+		in_dev = __in_dev_get_rcu(dev);
+		if (in_dev && !test_bit(i, in_dev->cnf.state))
+			in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
+		rcu_read_unlock();
+	}
+	read_unlock(&dev_base_lock);
+}
+
+static int devinet_conf_proc(ctl_table *ctl, int write,
+			     struct file* filp, void __user *buffer,
+			     size_t *lenp, loff_t *ppos)
+{
+	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+	if (write) {
+		struct ipv4_devconf *cnf = ctl->extra1;
+		int i = (int *)ctl->data - cnf->data;
+
+		set_bit(i, cnf->state);
+
+		if (cnf == &ipv4_devconf_dflt)
+			devinet_copy_dflt_conf(i);
+	}
+
+	return ret;
+}
+
+static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
+			       void __user *oldval, size_t __user *oldlenp,
+			       void __user *newval, size_t newlen)
+{
+	struct ipv4_devconf *cnf;
+	int *valp = table->data;
+	int new;
+	int i;
+
+	if (!newval || !newlen)
+		return 0;
+
+	if (newlen != sizeof(int))
+		return -EINVAL;
+
+	if (get_user(new, (int __user *)newval))
+		return -EFAULT;
+
+	if (new == *valp)
+		return 0;
+
+	if (oldval && oldlenp) {
+		size_t len;
+
+		if (get_user(len, oldlenp))
+			return -EFAULT;
+
+		if (len) {
+			if (len > table->maxlen)
+				len = table->maxlen;
+			if (copy_to_user(oldval, valp, len))
+				return -EFAULT;
+			if (put_user(len, oldlenp))
+				return -EFAULT;
+		}
+	}
+
+	*valp = new;
+
+	cnf = table->extra1;
+	i = (int *)table->data - cnf->data;
+
+	set_bit(i, cnf->state);
+
+	if (cnf == &ipv4_devconf_dflt)
+		devinet_copy_dflt_conf(i);
+
+	return 1;
+}
+
 void inet_forward_change(void)
 {
 	struct net_device *dev;
@@ -1302,40 +1387,13 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 				  void __user *oldval, size_t __user *oldlenp,
 				  void __user *newval, size_t newlen)
 {
-	int *valp = table->data;
-	int new;
-
-	if (!newval || !newlen)
-		return 0;
-
-	if (newlen != sizeof(int))
-		return -EINVAL;
+	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
+				      newval, newlen);
 
-	if (get_user(new, (int __user *)newval))
-		return -EFAULT;
-
-	if (new == *valp)
-		return 0;
-
-	if (oldval && oldlenp) {
-		size_t len;
-
-		if (get_user(len, oldlenp))
-			return -EFAULT;
-
-		if (len) {
-			if (len > table->maxlen)
-				len = table->maxlen;
-			if (copy_to_user(oldval, valp, len))
-				return -EFAULT;
-			if (put_user(len, oldlenp))
-				return -EFAULT;
-		}
-	}
+	if (ret == 1)
+		rt_cache_flush(0);
 
-	*valp = new;
-	rt_cache_flush(0);
-	return 1;
+	return ret;
 }
 
 
@@ -1349,13 +1407,16 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
 		.mode		= mval, \
 		.proc_handler	= proc, \
 		.strategy	= sysctl, \
+		.extra1		= &ipv4_devconf, \
 	}
 
 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
-	DEVINET_SYSCTL_ENTRY(attr, name, 0644, &proc_dointvec, NULL)
+	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
+			     devinet_conf_sysctl)
 
 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
-	DEVINET_SYSCTL_ENTRY(attr, name, 0444, &proc_dointvec, NULL)
+	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
+			     devinet_conf_sysctl)
 
 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
@@ -1374,7 +1435,8 @@ static struct devinet_sysctl_table {
 } devinet_sysctl = {
 	.devinet_vars = {
 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
-					     devinet_sysctl_forward, NULL),
+					     devinet_sysctl_forward,
+					     devinet_conf_sysctl),
 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
 
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
@@ -1448,6 +1510,7 @@ static void devinet_sysctl_register(struct in_device *in_dev,
 		return;
 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
+		t->devinet_vars[i].extra1 = p;
 	}
 
 	if (dev) {
-- 
cgit v1.1


From 71e27da9618b5f4d525ec821def83991da20429f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 4 Jun 2007 23:36:06 -0700
Subject: [IPV4]: Restore old behaviour of default config values

Previously inet devices were only constructed when addresses are added
(or rarely in ipmr).  Therefore the default config values they get are
the ones at the time of these operations.

Now that we're creating inet devices earlier, this changes the
behaviour of default config values in an incompatible way (see bug
#8519).

This patch creates a compromise by setting the default values at the
same point as before but only for those that have not been explicitly
set by the user since the inet device's creation.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h |  6 +++++-
 net/ipv4/devinet.c         | 19 ++++++++-----------
 net/ipv4/ipmr.c            | 15 +++++++++++----
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 40adefd..ae04901 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -59,6 +59,11 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index,
 	in_dev->cnf.data[index] = val;
 }
 
+static inline void ipv4_devconf_setall(struct in_device *in_dev)
+{
+	bitmap_fill(in_dev->cnf.state, __NET_IPV4_CONF_MAX - 1);
+}
+
 #define IN_DEV_CONF_GET(in_dev, attr) \
 	ipv4_devconf_get((in_dev), NET_IPV4_CONF_ ## attr)
 #define IN_DEV_CONF_SET(in_dev, attr, val) \
@@ -125,7 +130,6 @@ extern struct net_device 	*ip_dev_find(__be32 addr);
 extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
 extern int		devinet_ioctl(unsigned int cmd, void __user *);
 extern void		devinet_init(void);
-extern struct in_device *inetdev_init(struct net_device *dev);
 extern struct in_device	*inetdev_by_index(int);
 extern __be32		inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
 extern __be32		inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e197347..354e800 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -147,7 +147,7 @@ void in_dev_finish_destroy(struct in_device *idev)
 	}
 }
 
-struct in_device *inetdev_init(struct net_device *dev)
+static struct in_device *inetdev_init(struct net_device *dev)
 {
 	struct in_device *in_dev;
 
@@ -405,12 +405,10 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 	ASSERT_RTNL();
 
 	if (!in_dev) {
-		in_dev = inetdev_init(dev);
-		if (!in_dev) {
-			inet_free_ifa(ifa);
-			return -ENOBUFS;
-		}
+		inet_free_ifa(ifa);
+		return -ENOBUFS;
 	}
+	ipv4_devconf_setall(in_dev);
 	if (ifa->ifa_dev != in_dev) {
 		BUG_TRAP(!ifa->ifa_dev);
 		in_dev_hold(in_dev);
@@ -520,13 +518,12 @@ static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
 
 	in_dev = __in_dev_get_rtnl(dev);
 	if (in_dev == NULL) {
-		in_dev = inetdev_init(dev);
-		if (in_dev == NULL) {
-			err = -ENOBUFS;
-			goto errout;
-		}
+		err = -ENOBUFS;
+		goto errout;
 	}
 
+	ipv4_devconf_setall(in_dev);
+
 	ifa = inet_alloc_ifa();
 	if (ifa == NULL) {
 		/*
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d570d3a..d96582a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -152,9 +152,11 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
 			dev->flags |= IFF_MULTICAST;
 
 			in_dev = __in_dev_get_rtnl(dev);
-			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
+			if (in_dev == NULL)
 				goto failure;
-			IN_DEV_CONF_SET(in_dev, RP_FILTER, 0);
+
+			ipv4_devconf_setall(in_dev);
+			IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 
 			if (dev_open(dev))
 				goto failure;
@@ -218,10 +220,15 @@ static struct net_device *ipmr_reg_vif(void)
 	}
 	dev->iflink = 0;
 
-	if ((in_dev = inetdev_init(dev)) == NULL)
+	rcu_read_lock();
+	if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
+		rcu_read_unlock();
 		goto failure;
+	}
 
-	IN_DEV_CONF_SET(in_dev, RP_FILTER, 0);
+	ipv4_devconf_setall(in_dev);
+	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
+	rcu_read_unlock();
 
 	if (dev_open(dev))
 		goto failure;
-- 
cgit v1.1


From c36befb52350fa76bf3cb03d13ba2e17dbe8a508 Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <IvDoorn@gmail.com>
Date: Tue, 5 Jun 2007 00:04:46 -0700
Subject: [RFKILL]: Make rfkill->name const

The rfkill name can be made const safely,
this makes the compiler happy when drivers make
it point to some const string used elsewhere.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rfkill.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 7c1ffba..a8a6ea8 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -63,7 +63,7 @@ enum rfkill_state {
  * This structure represents a RF switch located on a network device.
  */
 struct rfkill {
-	char *name;
+	const char *name;
 	enum rfkill_type type;
 
 	enum rfkill_state state;
-- 
cgit v1.1


From c4b1010f406d7c3f819c22a6323c46776d5b148c Mon Sep 17 00:00:00 2001
From: Denis Cheng <crquan@gmail.com>
Date: Tue, 5 Jun 2007 00:06:57 -0700
Subject: [NET]: Merge dst_discard_in and dst_discard_out.

Signed-off-by: Denis Cheng <crquan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index 764bccb..c6a05879 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -111,13 +111,7 @@ out:
 	spin_unlock(&dst_lock);
 }
 
-static int dst_discard_in(struct sk_buff *skb)
-{
-	kfree_skb(skb);
-	return 0;
-}
-
-static int dst_discard_out(struct sk_buff *skb)
+static int dst_discard(struct sk_buff *skb)
 {
 	kfree_skb(skb);
 	return 0;
@@ -138,8 +132,7 @@ void * dst_alloc(struct dst_ops * ops)
 	dst->ops = ops;
 	dst->lastuse = jiffies;
 	dst->path = dst;
-	dst->input = dst_discard_in;
-	dst->output = dst_discard_out;
+	dst->input = dst->output = dst_discard;
 #if RT_CACHE_DEBUG >= 2
 	atomic_inc(&dst_total);
 #endif
@@ -153,8 +146,7 @@ static void ___dst_free(struct dst_entry * dst)
 	   protocol module is unloaded.
 	 */
 	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
-		dst->input = dst_discard_in;
-		dst->output = dst_discard_out;
+		dst->input = dst->output = dst_discard;
 	}
 	dst->obsolete = 2;
 }
@@ -242,8 +234,7 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 		return;
 
 	if (!unregister) {
-		dst->input = dst_discard_in;
-		dst->output = dst_discard_out;
+		dst->input = dst->output = dst_discard;
 	} else {
 		dst->dev = &loopback_dev;
 		dev_hold(&loopback_dev);
-- 
cgit v1.1


From 274707cff9810b784c548ed169298617a1bc3528 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 5 Jun 2007 00:16:12 -0700
Subject: [TCP]: Use LIMIT_NETDEBUG in tcp_retransmit_timer().

LIMIT_NETDEBUG allows the admin to disable some warning messages (echo 0
 >/proc/sys/net/core/warnings).

The "TCP: Treason uncloaked!" message can use this facility.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index e613401..e9b151b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -292,9 +292,9 @@ static void tcp_retransmit_timer(struct sock *sk)
 		 * we cannot allow such beasts to hang infinitely.
 		 */
 #ifdef TCP_DEBUG
-		if (net_ratelimit()) {
+		if (1) {
 			struct inet_sock *inet = inet_sk(sk);
-			printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
+			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
 			       NIPQUAD(inet->daddr), ntohs(inet->dport),
 			       inet->num, tp->snd_una, tp->snd_nxt);
 		}
-- 
cgit v1.1


From 14a49e1fd2bb91ba2bf0e1f06711b6dbc21de02d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 5 Jun 2007 00:19:24 -0700
Subject: [TCP] tcp_probe: Attach printf attribute properly to printl().

GCC doesn't like the way Stephen initially did it:

net/ipv4/tcp_probe.c:83: warning: empty declaration

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_probe.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 760165a..d9323df 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -63,6 +63,9 @@ struct {
  * FIXME: causes an extra copy
  */
 static void printl(const char *fmt, ...)
+	__attribute__ ((format (printf, 1, 2)));
+
+static void printl(const char *fmt, ...)
 {
 	va_list args;
 	int len;
@@ -80,8 +83,7 @@ static void printl(const char *fmt, ...)
 
 	kfifo_put(tcpw.fifo, tbuf, len);
 	wake_up(&tcpw.wait);
-} __attribute__ ((format (printf, 1, 2)));
-
+}
 
 /*
  * Hook inserted to be called before each receive packet.
-- 
cgit v1.1


From ef7c79ed645f52bcbdd88f8d54a9702c4d3fd15d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 5 Jun 2007 12:38:30 -0700
Subject: [NETLINK]: Mark netlink policies const

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h           |  2 +-
 include/net/genetlink.h           |  2 +-
 include/net/ip_fib.h              |  2 +-
 include/net/netlink.h             | 12 ++++++------
 net/core/neighbour.c              |  4 ++--
 net/core/rtnetlink.c              |  2 +-
 net/decnet/dn_dev.c               |  2 +-
 net/decnet/dn_rules.c             |  2 +-
 net/ipv4/devinet.c                |  2 +-
 net/ipv4/fib_frontend.c           |  2 +-
 net/ipv4/fib_rules.c              |  2 +-
 net/ipv6/addrconf.c               |  2 +-
 net/ipv6/fib6_rules.c             |  2 +-
 net/ipv6/route.c                  |  2 +-
 net/netlabel/netlabel_cipso_v4.c  |  2 +-
 net/netlabel/netlabel_mgmt.c      |  2 +-
 net/netlabel/netlabel_unlabeled.c |  2 +-
 net/netlink/attr.c                |  8 ++++----
 net/netlink/genetlink.c           |  2 +-
 19 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index ed3a887..83e41dd 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -64,7 +64,7 @@ struct fib_rules_ops
 	void			(*flush_cache)(void);
 
 	int			nlgroup;
-	struct nla_policy	*policy;
+	const struct nla_policy	*policy;
 	struct list_head	*rules_list;
 	struct module		*owner;
 };
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index adff4c8..b6eaca1 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -60,7 +60,7 @@ struct genl_ops
 {
 	u8			cmd;
 	unsigned int		flags;
-	struct nla_policy	*policy;
+	const struct nla_policy	*policy;
 	int		       (*doit)(struct sk_buff *skb,
 				       struct genl_info *info);
 	int		       (*dumpit)(struct sk_buff *skb,
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 5a4a0366..69252cb 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -213,7 +213,7 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
 /* Exported by fib_frontend.c */
-extern struct nla_policy rtm_ipv4_policy[];
+extern const struct nla_policy rtm_ipv4_policy[];
 extern void		ip_fib_init(void);
 extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 			       struct net_device *dev, __be32 *spec_dst, u32 *itag);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 0bf325c..7b510a9 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -222,10 +222,10 @@ extern int		nlmsg_notify(struct sock *sk, struct sk_buff *skb,
 				     gfp_t flags);
 
 extern int		nla_validate(struct nlattr *head, int len, int maxtype,
-				     struct nla_policy *policy);
+				     const struct nla_policy *policy);
 extern int		nla_parse(struct nlattr *tb[], int maxtype,
 				  struct nlattr *head, int len,
-				  struct nla_policy *policy);
+				  const struct nla_policy *policy);
 extern struct nlattr *	nla_find(struct nlattr *head, int len, int attrtype);
 extern size_t		nla_strlcpy(char *dst, const struct nlattr *nla,
 				    size_t dstsize);
@@ -360,7 +360,7 @@ static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining)
  */
 static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen,
 			      struct nlattr *tb[], int maxtype,
-			      struct nla_policy *policy)
+			      const struct nla_policy *policy)
 {
 	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
 		return -EINVAL;
@@ -392,7 +392,7 @@ static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh,
  * @policy: validation policy
  */
 static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype,
-				 struct nla_policy *policy)
+				 const struct nla_policy *policy)
 {
 	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
 		return -EINVAL;
@@ -729,7 +729,7 @@ static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype)
  */
 static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
 				   struct nlattr *nla,
-				   struct nla_policy *policy)
+				   const struct nla_policy *policy)
 {
 	return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
 }
@@ -990,7 +990,7 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
  * Returns 0 on success or a negative error code.
  */
 static inline int nla_validate_nested(struct nlattr *start, int maxtype,
-				      struct nla_policy *policy)
+				      const struct nla_policy *policy)
 {
 	return nla_validate(nla_data(start), nla_len(start), maxtype, policy);
 }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6f3bb73..9df26a0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1761,7 +1761,7 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
 	return NULL;
 }
 
-static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
+static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
 	[NDTA_NAME]		= { .type = NLA_STRING },
 	[NDTA_THRESH1]		= { .type = NLA_U32 },
 	[NDTA_THRESH2]		= { .type = NLA_U32 },
@@ -1770,7 +1770,7 @@ static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
 	[NDTA_PARMS]		= { .type = NLA_NESTED },
 };
 
-static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
+static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 27da9cd..a8a5093 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -551,7 +551,7 @@ cont:
 	return skb->len;
 }
 
-static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = {
+static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_IFNAME]		= { .type = NLA_STRING, .len = IFNAMSIZ-1 },
 	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
 	[IFLA_MTU]		= { .type = NLA_U32 },
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 764a56a..ab41c18 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -638,7 +638,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex)
 	return dn_dev;
 }
 
-static struct nla_policy dn_ifa_policy[IFA_MAX+1] __read_mostly = {
+static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
 	[IFA_ADDRESS]		= { .type = NLA_U16 },
 	[IFA_LOCAL]		= { .type = NLA_U16 },
 	[IFA_LABEL]		= { .type = NLA_STRING,
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 17a1932..84ff3dd 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -108,7 +108,7 @@ errout:
 	return err;
 }
 
-static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
+static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 354e800..fa97b96 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -84,7 +84,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
 
 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
 
-static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
+static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_LOCAL]     	= { .type = NLA_U32 },
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 9ad1f62..311d633 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -441,7 +441,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
 	return -EINVAL;
 }
 
-struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = {
+const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
 	[RTA_DST]		= { .type = NLA_U32 },
 	[RTA_SRC]		= { .type = NLA_U32 },
 	[RTA_IIF]		= { .type = NLA_U32 },
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 33083ad..2a94784 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -169,7 +169,7 @@ static struct fib_table *fib_empty_table(void)
 	return NULL;
 }
 
-static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
+static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 	[FRA_FLOW]	= { .type = NLA_U32 },
 };
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 329de67..5a5f8bd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2990,7 +2990,7 @@ static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
 	return pfx;
 }
 
-static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = {
+static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
 	[IFA_ADDRESS]		= { .len = sizeof(struct in6_addr) },
 	[IFA_LOCAL]		= { .len = sizeof(struct in6_addr) },
 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index fc3882c..53b3998 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -157,7 +157,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	return 1;
 }
 
-static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
+static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 };
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1324b06..fe8d983 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1999,7 +1999,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 	fib6_clean_all(rt6_mtu_change_route, 0, &arg);
 }
 
-static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
+static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 	[RTA_OIF]               = { .type = NLA_U32 },
 	[RTA_IIF]		= { .type = NLA_U32 },
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 07e47db..24b660f 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -59,7 +59,7 @@ static struct genl_family netlbl_cipsov4_gnl_family = {
 };
 
 /* NetLabel Netlink attribute policy */
-static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
+static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
 	[NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
 	[NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 },
 	[NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 },
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index e8c80f3..e00fc21 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -59,7 +59,7 @@ static struct genl_family netlbl_mgmt_gnl_family = {
 };
 
 /* NetLabel Netlink attribute policy */
-static struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
+static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
 	[NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING },
 	[NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 },
 	[NLBL_MGMT_A_VERSION] = { .type = NLA_U32 },
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index b931ede..5c303c68a 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -61,7 +61,7 @@ static struct genl_family netlbl_unlabel_gnl_family = {
 };
 
 /* NetLabel Netlink attribute policy */
-static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
+static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
 	[NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
 };
 
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index df5f820..c591212 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -24,9 +24,9 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
 };
 
 static int validate_nla(struct nlattr *nla, int maxtype,
-			struct nla_policy *policy)
+			const struct nla_policy *policy)
 {
-	struct nla_policy *pt;
+	const struct nla_policy *pt;
 	int minlen = 0, attrlen = nla_len(nla);
 
 	if (nla->nla_type <= 0 || nla->nla_type > maxtype)
@@ -99,7 +99,7 @@ static int validate_nla(struct nlattr *nla, int maxtype,
  * Returns 0 on success or a negative error code.
  */
 int nla_validate(struct nlattr *head, int len, int maxtype,
-		 struct nla_policy *policy)
+		 const struct nla_policy *policy)
 {
 	struct nlattr *nla;
 	int rem, err;
@@ -130,7 +130,7 @@ errout:
  * Returns 0 on success or a negative error code.
  */
 int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
-	      struct nla_policy *policy)
+	      const struct nla_policy *policy)
 {
 	struct nlattr *nla;
 	int rem, err;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 6e31234..b9ab62f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -472,7 +472,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid,
 	return skb;
 }
 
-static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = {
+static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = {
 	[CTRL_ATTR_FAMILY_ID]	= { .type = NLA_U16 },
 	[CTRL_ATTR_FAMILY_NAME]	= { .type = NLA_NUL_STRING,
 				    .len = GENL_NAMSIZ - 1 },
-- 
cgit v1.1


From 51055be81c3cb14d0165a7432b787098b817fd35 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 5 Jun 2007 12:40:01 -0700
Subject: [RTNETLINK]: ifindex 0 does not exist

ifindex == 0 does not exist and implies we should do a lookup by name if
one was given.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a8a5093..02e8bf0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -580,7 +580,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	err = -EINVAL;
 	ifm = nlmsg_data(nlh);
-	if (ifm->ifi_index >= 0)
+	if (ifm->ifi_index > 0)
 		dev = dev_get_by_index(ifm->ifi_index);
 	else if (tb[IFLA_IFNAME])
 		dev = dev_get_by_name(ifname);
@@ -672,7 +672,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	 * name provided implies that a name change has been
 	 * requested.
 	 */
-	if (ifm->ifi_index >= 0 && ifname[0]) {
+	if (ifm->ifi_index > 0 && ifname[0]) {
 		err = dev_change_name(dev, ifname);
 		if (err < 0)
 			goto errout_dev;
@@ -740,7 +740,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		return err;
 
 	ifm = nlmsg_data(nlh);
-	if (ifm->ifi_index >= 0) {
+	if (ifm->ifi_index > 0) {
 		dev = dev_get_by_index(ifm->ifi_index);
 		if (dev == NULL)
 			return -ENODEV;
-- 
cgit v1.1


From 3c158f7f57601bc27eab82f0dc4fd3fad314d845 Mon Sep 17 00:00:00 2001
From: Patrick McHarrdy <kaber@trash.net>
Date: Tue, 5 Jun 2007 12:55:27 -0700
Subject: [NETFILTER]: nf_conntrack: fix helper module unload races

When a helper module is unloaded all conntracks refering to it have their
helper pointer NULLed out, leading to lots of races. In most places this
can be fixed by proper use of RCU (they do already check for != NULL,
but in a racy way), additionally nf_conntrack_expect_related needs to
bail out when no helper is present.

Also remove two paranoid BUG_ONs in nf_conntrack_proto_gre that are racy
and not worth fixing.

Signed-off-by: Patrick McHarrdy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 13 ++++++----
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  9 +++++--
 net/netfilter/nf_conntrack_core.c              | 26 ++++++++++++++------
 net/netfilter/nf_conntrack_expect.c            |  4 +++
 net/netfilter/nf_conntrack_helper.c            |  2 +-
 net/netfilter/nf_conntrack_netlink.c           | 34 +++++++++++++++++---------
 net/netfilter/nf_conntrack_proto_gre.c         |  2 --
 7 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fd62a41..6dc72a8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -133,6 +133,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_help *help;
+	struct nf_conntrack_helper *helper;
 
 	/* This is where we call the helper: as the packet goes out. */
 	ct = nf_ct_get(*pskb, &ctinfo);
@@ -140,12 +141,14 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	help = nfct_help(ct);
-	if (!help || !help->helper)
+	if (!help)
 		return NF_ACCEPT;
-
-	return help->helper->help(pskb,
-				  skb_network_offset(*pskb) + ip_hdrlen(*pskb),
-				  ct, ctinfo);
+	/* rcu_read_lock()ed by nf_hook_slow */
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		return NF_ACCEPT;
+	return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+			    ct, ctinfo);
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index dc442fb..1b1797f 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -160,6 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 {
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
+	struct nf_conntrack_helper *helper;
 	enum ip_conntrack_info ctinfo;
 	unsigned int ret, protoff;
 	unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
@@ -172,7 +173,11 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 		goto out;
 
 	help = nfct_help(ct);
-	if (!help || !help->helper)
+	if (!help)
+		goto out;
+	/* rcu_read_lock()ed by nf_hook_slow */
+	helper = rcu_dereference(help->helper);
+	if (!helper)
 		goto out;
 
 	protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
@@ -182,7 +187,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 		return NF_ACCEPT;
 	}
 
-	ret = help->helper->help(pskb, protoff, ct, ctinfo);
+	ret = helper->help(pskb, protoff, ct, ctinfo);
 	if (ret != NF_ACCEPT)
 		return ret;
 out:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 483e927..7a15e30 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -350,9 +350,15 @@ static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
 	struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_helper *helper;
 
-	if (help && help->helper && help->helper->destroy)
-		help->helper->destroy(ct);
+	if (help) {
+		rcu_read_lock();
+		helper = rcu_dereference(help->helper);
+		if (helper && helper->destroy)
+			helper->destroy(ct);
+		rcu_read_unlock();
+	}
 
 	write_lock_bh(&nf_conntrack_lock);
 	/* Inside lock so preempt is disabled on module removal path.
@@ -661,6 +667,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	       unsigned int dataoff)
 {
 	struct nf_conn *conntrack;
+	struct nf_conn_help *help;
 	struct nf_conntrack_tuple repl_tuple;
 	struct nf_conntrack_expect *exp;
 	u_int32_t features = 0;
@@ -691,6 +698,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 	write_lock_bh(&nf_conntrack_lock);
 	exp = find_expectation(tuple);
 
+	help = nfct_help(conntrack);
 	if (exp) {
 		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
 			conntrack, exp);
@@ -698,7 +706,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
 		conntrack->master = exp->master;
 		if (exp->helper)
-			nfct_help(conntrack)->helper = exp->helper;
+			rcu_assign_pointer(help->helper, exp->helper);
 #ifdef CONFIG_NF_CONNTRACK_MARK
 		conntrack->mark = exp->master->mark;
 #endif
@@ -708,10 +716,11 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		nf_conntrack_get(&conntrack->master->ct_general);
 		NF_CT_STAT_INC(expect_new);
 	} else {
-		struct nf_conn_help *help = nfct_help(conntrack);
-
-		if (help)
-			help->helper = __nf_ct_helper_find(&repl_tuple);
+		if (help) {
+			/* not in hash table yet, so not strictly necessary */
+			rcu_assign_pointer(help->helper,
+					   __nf_ct_helper_find(&repl_tuple));
+		}
 		NF_CT_STAT_INC(new);
 	}
 
@@ -893,7 +902,8 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 		helper = __nf_ct_helper_find(newreply);
 		if (helper)
 			memset(&help->help, 0, sizeof(help->help));
-		help->helper = helper;
+		/* not in hash table yet, so not strictly necessary */
+		rcu_assign_pointer(help->helper, helper);
 	}
 	write_unlock_bh(&nf_conntrack_lock);
 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 117cbfd..504fb6c 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -337,6 +337,10 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
 	NF_CT_ASSERT(master_help);
 
 	write_lock_bh(&nf_conntrack_lock);
+	if (!master_help->helper) {
+		ret = -ESHUTDOWN;
+		goto out;
+	}
 	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
 		if (expect_matches(i, expect)) {
 			/* Refresh timer: if it's dying, ignore.. */
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0743be4..f868b7f 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -93,7 +93,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 
 	if (help && help->helper == me) {
 		nf_conntrack_event(IPCT_HELPER, ct);
-		help->helper = NULL;
+		rcu_assign_pointer(help->helper, NULL);
 	}
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d6d39e2..3f73327 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -171,21 +171,29 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
 {
 	struct nfattr *nest_helper;
 	const struct nf_conn_help *help = nfct_help(ct);
+	struct nf_conntrack_helper *helper;
 
-	if (!help || !help->helper)
+	if (!help)
 		return 0;
 
+	rcu_read_lock();
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		goto out;
+
 	nest_helper = NFA_NEST(skb, CTA_HELP);
-	NFA_PUT(skb, CTA_HELP_NAME, strlen(help->helper->name), help->helper->name);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(helper->name), helper->name);
 
-	if (help->helper->to_nfattr)
-		help->helper->to_nfattr(skb, ct);
+	if (helper->to_nfattr)
+		helper->to_nfattr(skb, ct);
 
 	NFA_NEST_END(skb, nest_helper);
-
+out:
+	rcu_read_unlock();
 	return 0;
 
 nfattr_failure:
+	rcu_read_unlock();
 	return -1;
 }
 
@@ -842,7 +850,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
 		if (help && help->helper) {
 			/* we had a helper before ... */
 			nf_ct_remove_expectations(ct);
-			help->helper = NULL;
+			rcu_assign_pointer(help->helper, NULL);
 		}
 
 		return 0;
@@ -866,7 +874,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
 
 	/* need to zero data of old helper */
 	memset(&help->help, 0, sizeof(help->help));
-	help->helper = helper;
+	rcu_assign_pointer(help->helper, helper);
 
 	return 0;
 }
@@ -950,6 +958,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 	struct nf_conn *ct;
 	int err = -EINVAL;
 	struct nf_conn_help *help;
+	struct nf_conntrack_helper *helper = NULL;
 
 	ct = nf_conntrack_alloc(otuple, rtuple);
 	if (ct == NULL || IS_ERR(ct))
@@ -980,14 +989,17 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 #endif
 
 	help = nfct_help(ct);
-	if (help)
-		help->helper = nf_ct_helper_find_get(rtuple);
+	if (help) {
+		helper = nf_ct_helper_find_get(rtuple);
+		/* not in hash table yet so not strictly necessary */
+		rcu_assign_pointer(help->helper, helper);
+	}
 
 	add_timer(&ct->timeout);
 	nf_conntrack_hash_insert(ct);
 
-	if (help && help->helper)
-		nf_ct_helper_put(help->helper);
+	if (helper)
+		nf_ct_helper_put(helper);
 
 	return 0;
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 5434472..339c397 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -100,7 +100,6 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_ct_gre_keymap **kmp, *km;
 
-	BUG_ON(strcmp(help->helper->name, "pptp"));
 	kmp = &help->help.ct_pptp_info.keymap[dir];
 	if (*kmp) {
 		/* check whether it's a retransmission */
@@ -137,7 +136,6 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 	enum ip_conntrack_dir dir;
 
 	DEBUGP("entering for ct %p\n", ct);
-	BUG_ON(strcmp(help->helper->name, "pptp"));
 
 	write_lock_bh(&nf_ct_gre_lock);
 	for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
-- 
cgit v1.1


From 4c1b52bc7a2f5ee01ea3fc248a8748a1c6843f7c Mon Sep 17 00:00:00 2001
From: Dmitry Mishin <dim@openvz.org>
Date: Tue, 5 Jun 2007 12:56:09 -0700
Subject: [NETFILTER]: ip_tables: fix compat related crash

check_compat_entry_size_and_hooks iterates over the matches and calls
compat_check_calc_match, which loads the match and calculates the
compat offsets, but unlike the non-compat version, doesn't call
->checkentry yet. On error however it calls cleanup_matches, which in
turn calls ->destroy, which can result in crashes if the destroy
function (validly) expects to only get called after the checkentry
function.

Add a compat_release_match function that only drops the module reference
on error and rename compat_check_calc_match to compat_find_calc_match to
reflect the fact that it doesn't call the checkentry function.

Reported by Jan Engelhardt <jengelh@linux01.gwdg.de>

Signed-off-by: Dmitry Mishin <dim@openvz.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_tables.h | 20 ++++++++
 net/ipv4/netfilter/ip_tables.c           | 81 +++++++++++++++++++++++++-------
 2 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 2f46dd7..e992cd6 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -264,6 +264,26 @@ ipt_get_target(struct ipt_entry *e)
 	__ret;							\
 })
 
+/* fn returns 0 to continue iteration */
+#define IPT_ENTRY_ITERATE_CONTINUE(entries, size, n, fn, args...) \
+({								\
+	unsigned int __i, __n;					\
+	int __ret = 0;						\
+	struct ipt_entry *__entry;				\
+								\
+	for (__i = 0, __n = 0; __i < (size);			\
+	     __i += __entry->next_offset, __n++) { 		\
+		__entry = (void *)(entries) + __i;		\
+		if (__n < n)					\
+			continue;				\
+								\
+		__ret = fn(__entry , ## args);			\
+		if (__ret != 0)					\
+			break;					\
+	}							\
+	__ret;							\
+})
+
 /*
  *	Main firewall chains definitions and global var's definitions.
  */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e3f83bf..9bacf1a0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -499,7 +499,8 @@ check_entry(struct ipt_entry *e, const char *name)
 }
 
 static inline int check_match(struct ipt_entry_match *m, const char *name,
-				const struct ipt_ip *ip, unsigned int hookmask)
+				const struct ipt_ip *ip, unsigned int hookmask,
+				unsigned int *i)
 {
 	struct xt_match *match;
 	int ret;
@@ -515,6 +516,8 @@ static inline int check_match(struct ipt_entry_match *m, const char *name,
 			 m->u.kernel.match->name);
 		ret = -EINVAL;
 	}
+	if (!ret)
+		(*i)++;
 	return ret;
 }
 
@@ -537,11 +540,10 @@ find_check_match(struct ipt_entry_match *m,
 	}
 	m->u.kernel.match = match;
 
-	ret = check_match(m, name, ip, hookmask);
+	ret = check_match(m, name, ip, hookmask, i);
 	if (ret)
 		goto err;
 
-	(*i)++;
 	return 0;
 err:
 	module_put(m->u.kernel.match->me);
@@ -1425,7 +1427,7 @@ out:
 }
 
 static inline int
-compat_check_calc_match(struct ipt_entry_match *m,
+compat_find_calc_match(struct ipt_entry_match *m,
 	    const char *name,
 	    const struct ipt_ip *ip,
 	    unsigned int hookmask,
@@ -1449,6 +1451,31 @@ compat_check_calc_match(struct ipt_entry_match *m,
 }
 
 static inline int
+compat_release_match(struct ipt_entry_match *m, unsigned int *i)
+{
+	if (i && (*i)-- == 0)
+		return 1;
+
+	module_put(m->u.kernel.match->me);
+	return 0;
+}
+
+static inline int
+compat_release_entry(struct ipt_entry *e, unsigned int *i)
+{
+	struct ipt_entry_target *t;
+
+	if (i && (*i)-- == 0)
+		return 1;
+
+	/* Cleanup all matches */
+	IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+	t = ipt_get_target(e);
+	module_put(t->u.kernel.target->me);
+	return 0;
+}
+
+static inline int
 check_compat_entry_size_and_hooks(struct ipt_entry *e,
 			   struct xt_table_info *newinfo,
 			   unsigned int *size,
@@ -1485,10 +1512,10 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 	off = 0;
 	entry_offset = (void *)e - (void *)base;
 	j = 0;
-	ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip,
+	ret = IPT_MATCH_ITERATE(e, compat_find_calc_match, name, &e->ip,
 			e->comefrom, &off, &j);
 	if (ret != 0)
-		goto cleanup_matches;
+		goto release_matches;
 
 	t = ipt_get_target(e);
 	target = try_then_request_module(xt_find_target(AF_INET,
@@ -1499,7 +1526,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
 							t->u.user.name);
 		ret = target ? PTR_ERR(target) : -ENOENT;
-		goto cleanup_matches;
+		goto release_matches;
 	}
 	t->u.kernel.target = target;
 
@@ -1526,8 +1553,8 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
 
 out:
 	module_put(t->u.kernel.target->me);
-cleanup_matches:
-	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+release_matches:
+	IPT_MATCH_ITERATE(e, compat_release_match, &j);
 	return ret;
 }
 
@@ -1574,15 +1601,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr,
 	return ret;
 }
 
-static inline int compat_check_entry(struct ipt_entry *e, const char *name)
+static inline int compat_check_entry(struct ipt_entry *e, const char *name,
+						unsigned int *i)
 {
-	int ret;
+	int j, ret;
 
-	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom);
+	j = 0;
+	ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
 	if (ret)
-		return ret;
+		goto cleanup_matches;
+
+	ret = check_target(e, name);
+	if (ret)
+		goto cleanup_matches;
 
-	return check_target(e, name);
+	(*i)++;
+	return 0;
+
+ cleanup_matches:
+	IPT_MATCH_ITERATE(e, cleanup_match, &j);
+	return ret;
 }
 
 static int
@@ -1673,10 +1711,17 @@ translate_compat_table(const char *name,
 	if (!mark_source_chains(newinfo, valid_hooks, entry1))
 		goto free_newinfo;
 
+	i = 0;
 	ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
-									name);
-	if (ret)
-		goto free_newinfo;
+								name, &i);
+	if (ret) {
+		j -= i;
+		IPT_ENTRY_ITERATE_CONTINUE(entry1, newinfo->size, i,
+						compat_release_entry, &j);
+		IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+		xt_free_table_info(newinfo);
+		return ret;
+	}
 
 	/* And one copy for every other CPU */
 	for_each_possible_cpu(i)
@@ -1691,7 +1736,7 @@ translate_compat_table(const char *name,
 free_newinfo:
 	xt_free_table_info(newinfo);
 out:
-	IPT_ENTRY_ITERATE(entry0, total_size, cleanup_entry, &j);
+	IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
 	return ret;
 out_unlock:
 	compat_flush_offsets();
-- 
cgit v1.1


From c764c9ade6d9b710bad2b9c631ede9864333b98c Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 5 Jun 2007 12:56:53 -0700
Subject: [NETFILTER]: nf_conntrack_amanda: fix textsearch_prepare() error
 check

The return value from textsearch_prepare() needs to be checked
by IS_ERR(). Because it returns error code as a pointer.

Cc: "Brian J. Murrell" <netfilter@interlinx.bc.ca>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_amanda.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index b8869ea..0568f2e86 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -208,13 +208,14 @@ static int __init nf_conntrack_amanda_init(void)
 {
 	int ret, i;
 
-	ret = -ENOMEM;
 	for (i = 0; i < ARRAY_SIZE(search); i++) {
 		search[i].ts = textsearch_prepare(ts_algo, search[i].string,
 						  search[i].len,
 						  GFP_KERNEL, TS_AUTOLOAD);
-		if (search[i].ts == NULL)
+		if (IS_ERR(search[i].ts)) {
+			ret = PTR_ERR(search[i].ts);
 			goto err1;
+		}
 	}
 	ret = nf_conntrack_helper_register(&amanda_helper[0]);
 	if (ret < 0)
@@ -227,10 +228,9 @@ static int __init nf_conntrack_amanda_init(void)
 err2:
 	nf_conntrack_helper_unregister(&amanda_helper[0]);
 err1:
-	for (; i >= 0; i--) {
-		if (search[i].ts)
-			textsearch_destroy(search[i].ts);
-	}
+	while (--i >= 0)
+		textsearch_destroy(search[i].ts);
+
 	return ret;
 }
 
-- 
cgit v1.1


From 3c0d2f3780fc94746c4842e965bd2570e2119bb6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 5 Jun 2007 13:10:29 -0700
Subject: [AF_UNIX]: Fix stream recvmsg() race.

A recv() on an AF_UNIX, SOCK_STREAM socket can race with a
send()+close() on the peer, causing recv() to return zero, even though
the sent data should be received.

This happens if the send() and the close() is performed between
skb_dequeue() and checking sk->sk_shutdown in unix_stream_recvmsg():

process A  skb_dequeue() returns NULL, there's no data in the socket queue
process B  new data is inserted onto the queue by unix_stream_sendmsg()
process B  sk->sk_shutdown is set to SHUTDOWN_MASK by unix_release_sock()
process A  sk->sk_shutdown is checked, unix_release_sock() returns zero

I'm surprised nobody noticed this, it's not hard to trigger.  Maybe
it's just (un)luck with the timing.

It's possible to work around this bug in userspace, by retrying the
recv() once in case of a zero return value.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 87c794d8..d70fa30 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1744,20 +1744,23 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		int chunk;
 		struct sk_buff *skb;
 
+		unix_state_lock(sk);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb==NULL)
 		{
 			if (copied >= target)
-				break;
+				goto unlock;
 
 			/*
 			 *	POSIX 1003.1g mandates this order.
 			 */
 
 			if ((err = sock_error(sk)) != 0)
-				break;
+				goto unlock;
 			if (sk->sk_shutdown & RCV_SHUTDOWN)
-				break;
+				goto unlock;
+
+			unix_state_unlock(sk);
 			err = -EAGAIN;
 			if (!timeo)
 				break;
@@ -1771,7 +1774,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			}
 			mutex_lock(&u->readlock);
 			continue;
+ unlock:
+			unix_state_unlock(sk);
+			break;
 		}
+		unix_state_unlock(sk);
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-- 
cgit v1.1


From df2bc459a3ad71f8b44c358bf7169acf9caf4acd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 5 Jun 2007 15:18:43 -0700
Subject: [UDP]: Revert 2-pass hashing changes.

This reverts changesets:

6aaf47fa48d3c44280810b1b470261d340e4ed87
b7b5f487ab39bc10ed0694af35651a03d9cb97ff
de34ed91c4ffa4727964a832c46e624dd1495cf5
fc038410b4b1643766f8033f4940bcdb1dace633

There are still some correctness issues recently
discovered which do not have a known fix that doesn't
involve doing a full hash table scan on port bind.

So revert for now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h     |   9 +-
 include/net/udplite.h |   2 +-
 net/ipv4/udp.c        | 241 ++++++++++++++------------------------------------
 net/ipv4/udp_impl.h   |   6 +-
 net/ipv4/udplite.c    |   7 +-
 net/ipv6/udp.c        |  21 +----
 net/ipv6/udp_impl.h   |   2 -
 net/ipv6/udplite.c    |   2 +-
 8 files changed, 78 insertions(+), 212 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index 496f89d..98755eb 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -119,16 +119,9 @@ static inline void udp_lib_close(struct sock *sk, long timeout)
 }
 
 
-struct udp_get_port_ops {
-	int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2);
-	int (*saddr_any)(const struct sock *sk);
-	unsigned int (*hash_port_and_rcv_saddr)(__u16 port,
-						const struct sock *sk);
-};
-
 /* net/ipv4/udp.c */
 extern int	udp_get_port(struct sock *sk, unsigned short snum,
-			     const struct udp_get_port_ops *ops);
+			     int (*saddr_cmp)(const struct sock *, const struct sock *));
 extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 50b4b42..635b0ea 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -120,5 +120,5 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
 
 extern void	udplite4_register(void);
 extern int 	udplite_get_port(struct sock *sk, unsigned short snum,
-				 const struct udp_get_port_ops *ops);
+			int (*scmp)(const struct sock *, const struct sock *));
 #endif	/* _UDPLITE_H */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5da703e..facb7e2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,36 +114,14 @@ DEFINE_RWLOCK(udp_hash_lock);
 
 static int udp_port_rover;
 
-/*
- * Note about this hash function :
- * Typical use is probably daddr = 0, only dport is going to vary hash
- */
-static inline unsigned int udp_hash_port(__u16 port)
-{
-	return port;
-}
-
-static inline int __udp_lib_port_inuse(unsigned int hash, int port,
-				       const struct sock *this_sk,
-				       struct hlist_head udptable[],
-				       const struct udp_get_port_ops *ops)
+static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
 {
 	struct sock *sk;
 	struct hlist_node *node;
-	struct inet_sock *inet;
 
-	sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
-		if (sk->sk_hash != hash)
-			continue;
-		inet = inet_sk(sk);
-		if (inet->num != port)
-			continue;
-		if (this_sk) {
-			if (ops->saddr_cmp(sk, this_sk))
-				return 1;
-		} else if (ops->saddr_any(sk))
+	sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
+		if (sk->sk_hash == num)
 			return 1;
-	}
 	return 0;
 }
 
@@ -154,16 +132,16 @@ static inline int __udp_lib_port_inuse(unsigned int hash, int port,
  *  @snum:        port number to look up
  *  @udptable:    hash list table, must be of UDP_HTABLE_SIZE
  *  @port_rover:  pointer to record of last unallocated port
- *  @ops:         AF-dependent address operations
+ *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
  */
 int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 		       struct hlist_head udptable[], int *port_rover,
-		       const struct udp_get_port_ops *ops)
+		       int (*saddr_comp)(const struct sock *sk1,
+					 const struct sock *sk2 )    )
 {
 	struct hlist_node *node;
 	struct hlist_head *head;
 	struct sock *sk2;
-	unsigned int hash;
 	int    error = 1;
 
 	write_lock_bh(&udp_hash_lock);
@@ -178,8 +156,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
 			int size;
 
-			hash = ops->hash_port_and_rcv_saddr(result, sk);
-			head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+			head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
 			if (hlist_empty(head)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
@@ -204,16 +181,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
 					   (UDP_HTABLE_SIZE - 1));
-			hash = udp_hash_port(result);
-			if (__udp_lib_port_inuse(hash, result,
-						 NULL, udptable, ops))
-				continue;
-			if (ops->saddr_any(sk))
-				break;
-
-			hash = ops->hash_port_and_rcv_saddr(result, sk);
-			if (! __udp_lib_port_inuse(hash, result,
-						   sk, udptable, ops))
+			if (! __udp_lib_lport_inuse(result, udptable))
 				break;
 		}
 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -221,40 +189,21 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 gotit:
 		*port_rover = snum = result;
 	} else {
-		hash = udp_hash_port(snum);
-		head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
 
 		sk_for_each(sk2, node, head)
-			if (sk2->sk_hash == hash &&
-			    sk2 != sk &&
-			    inet_sk(sk2)->num == snum &&
-			    (!sk2->sk_reuse || !sk->sk_reuse) &&
-			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
-			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    ops->saddr_cmp(sk, sk2))
+			if (sk2->sk_hash == snum                             &&
+			    sk2 != sk                                        &&
+			    (!sk2->sk_reuse        || !sk->sk_reuse)         &&
+			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+			     || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+			    (*saddr_comp)(sk, sk2)                             )
 				goto fail;
-
-		if (!ops->saddr_any(sk)) {
-			hash = ops->hash_port_and_rcv_saddr(snum, sk);
-			head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
-
-			sk_for_each(sk2, node, head)
-				if (sk2->sk_hash == hash &&
-				    sk2 != sk &&
-				    inet_sk(sk2)->num == snum &&
-				    (!sk2->sk_reuse || !sk->sk_reuse) &&
-				    (!sk2->sk_bound_dev_if ||
-				     !sk->sk_bound_dev_if ||
-				     sk2->sk_bound_dev_if ==
-				     sk->sk_bound_dev_if) &&
-				    ops->saddr_cmp(sk, sk2))
-					goto fail;
-		}
 	}
 	inet_sk(sk)->num = snum;
-	sk->sk_hash = hash;
+	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-		head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
 		sk_add_node(sk, head);
 		sock_prot_inc_use(sk->sk_prot);
 	}
@@ -265,12 +214,12 @@ fail:
 }
 
 int udp_get_port(struct sock *sk, unsigned short snum,
-		 const struct udp_get_port_ops *ops)
+			int (*scmp)(const struct sock *, const struct sock *))
 {
-	return  __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, ops);
+	return  __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
 }
 
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 {
 	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
 
@@ -279,33 +228,9 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 		   inet1->rcv_saddr == inet2->rcv_saddr      ));
 }
 
-static int ipv4_rcv_saddr_any(const struct sock *sk)
-{
-	return !inet_sk(sk)->rcv_saddr;
-}
-
-static inline unsigned int ipv4_hash_port_and_addr(__u16 port, __be32 addr)
-{
-	addr ^= addr >> 16;
-	addr ^= addr >> 8;
-	return port ^ addr;
-}
-
-static unsigned int ipv4_hash_port_and_rcv_saddr(__u16 port,
-						 const struct sock *sk)
-{
-	return ipv4_hash_port_and_addr(port, inet_sk(sk)->rcv_saddr);
-}
-
-const struct udp_get_port_ops udp_ipv4_ops = {
-	.saddr_cmp = ipv4_rcv_saddr_equal,
-	.saddr_any = ipv4_rcv_saddr_any,
-	.hash_port_and_rcv_saddr = ipv4_hash_port_and_rcv_saddr,
-};
-
 static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
 {
-	return udp_get_port(sk, snum, &udp_ipv4_ops);
+	return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
 }
 
 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
@@ -317,77 +242,63 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
 {
 	struct sock *sk, *result = NULL;
 	struct hlist_node *node;
-	unsigned int hash, hashwild;
-	int score, best = -1, hport = ntohs(dport);
-
-	hash = ipv4_hash_port_and_addr(hport, daddr);
-	hashwild = udp_hash_port(hport);
+	unsigned short hnum = ntohs(dport);
+	int badness = -1;
 
 	read_lock(&udp_hash_lock);
-
-lookup:
-
-	sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
+	sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (sk->sk_hash != hash || ipv6_only_sock(sk) ||
-			inet->num != hport)
-			continue;
-
-		score = (sk->sk_family == PF_INET ? 1 : 0);
-		if (inet->rcv_saddr) {
-			if (inet->rcv_saddr != daddr)
-				continue;
-			score+=2;
-		}
-		if (inet->daddr) {
-			if (inet->daddr != saddr)
-				continue;
-			score+=2;
-		}
-		if (inet->dport) {
-			if (inet->dport != sport)
-				continue;
-			score+=2;
-		}
-		if (sk->sk_bound_dev_if) {
-			if (sk->sk_bound_dev_if != dif)
-				continue;
-			score+=2;
-		}
-		if (score == 9) {
-			result = sk;
-			goto found;
-		} else if (score > best) {
-			result = sk;
-			best = score;
+		if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
+			int score = (sk->sk_family == PF_INET ? 1 : 0);
+			if (inet->rcv_saddr) {
+				if (inet->rcv_saddr != daddr)
+					continue;
+				score+=2;
+			}
+			if (inet->daddr) {
+				if (inet->daddr != saddr)
+					continue;
+				score+=2;
+			}
+			if (inet->dport) {
+				if (inet->dport != sport)
+					continue;
+				score+=2;
+			}
+			if (sk->sk_bound_dev_if) {
+				if (sk->sk_bound_dev_if != dif)
+					continue;
+				score+=2;
+			}
+			if (score == 9) {
+				result = sk;
+				break;
+			} else if (score > badness) {
+				result = sk;
+				badness = score;
+			}
 		}
 	}
-
-	if (hash != hashwild) {
-		hash = hashwild;
-		goto lookup;
-	}
-found:
 	if (result)
 		sock_hold(result);
 	read_unlock(&udp_hash_lock);
 	return result;
 }
 
-static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum,
-					     int hport, __be32 loc_addr,
+static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+					     __be16 loc_port, __be32 loc_addr,
 					     __be16 rmt_port, __be32 rmt_addr,
 					     int dif)
 {
 	struct hlist_node *node;
 	struct sock *s = sk;
+	unsigned short hnum = ntohs(loc_port);
 
 	sk_for_each_from(s, node) {
 		struct inet_sock *inet = inet_sk(s);
 
 		if (s->sk_hash != hnum					||
-		    inet->num != hport					||
 		    (inet->daddr && inet->daddr != rmt_addr)		||
 		    (inet->dport != rmt_port && inet->dport)		||
 		    (inet->rcv_saddr && inet->rcv_saddr != loc_addr)	||
@@ -1221,45 +1132,29 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 				    __be32 saddr, __be32 daddr,
 				    struct hlist_head udptable[])
 {
-	struct sock *sk, *skw, *sknext;
+	struct sock *sk;
 	int dif;
-	int hport = ntohs(uh->dest);
-	unsigned int hash = ipv4_hash_port_and_addr(hport, daddr);
-	unsigned int hashwild = udp_hash_port(hport);
-
-	dif = skb->dev->ifindex;
 
 	read_lock(&udp_hash_lock);
-
-	sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]);
-	skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]);
-
-	sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif);
-	if (!sk) {
-		hash = hashwild;
-		sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source,
-			saddr, dif);
-	}
+	sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+	dif = skb->dev->ifindex;
+	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
 	if (sk) {
+		struct sock *sknext = NULL;
+
 		do {
 			struct sk_buff *skb1 = skb;
-			sknext = udp_v4_mcast_next(sk_next(sk), hash, hport,
-						daddr, uh->source, saddr, dif);
-			if (!sknext && hash != hashwild) {
-				hash = hashwild;
-				sknext = udp_v4_mcast_next(skw, hash, hport,
-					daddr, uh->source, saddr, dif);
-			}
+
+			sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
+						   uh->source, saddr, dif);
 			if (sknext)
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
 			if (skb1) {
 				int ret = udp_queue_rcv_skb(sk, skb1);
 				if (ret > 0)
-					/*
-					 * we should probably re-process
-					 * instead of dropping packets here.
-					 */
+					/* we should probably re-process instead
+					 * of dropping packets here. */
 					kfree_skb(skb1);
 			}
 			sk = sknext;
@@ -1346,7 +1241,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 		return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
 
 	sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
-			       skb->dev->ifindex, udptable);
+			       skb->dev->ifindex, udptable        );
 
 	if (sk != NULL) {
 		int ret = udp_queue_rcv_skb(sk, skb);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 06d9419..820a477 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -5,14 +5,14 @@
 #include <net/protocol.h>
 #include <net/inet_common.h>
 
-extern const struct udp_get_port_ops udp_ipv4_ops;
-
 extern int  	__udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int );
 extern void 	__udp4_lib_err(struct sk_buff *, u32, struct hlist_head []);
 
 extern int	__udp_lib_get_port(struct sock *sk, unsigned short snum,
 				   struct hlist_head udptable[], int *port_rover,
-				   const struct udp_get_port_ops *ops);
+				   int (*)(const struct sock*,const struct sock*));
+extern int	ipv4_rcv_saddr_equal(const struct sock *, const struct sock *);
+
 
 extern int	udp_setsockopt(struct sock *sk, int level, int optname,
 			       char __user *optval, int optlen);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 3653b32..f34fd68 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -19,15 +19,14 @@ struct hlist_head 	udplite_hash[UDP_HTABLE_SIZE];
 static int		udplite_port_rover;
 
 int udplite_get_port(struct sock *sk, unsigned short p,
-		     const struct udp_get_port_ops *ops)
+		     int (*c)(const struct sock *, const struct sock *))
 {
-	return  __udp_lib_get_port(sk, p, udplite_hash,
-				   &udplite_port_rover, ops);
+	return  __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c);
 }
 
 static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
 {
-	return udplite_get_port(sk, snum, &udp_ipv4_ops);
+	return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal);
 }
 
 static int udplite_rcv(struct sk_buff *skb)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d1fbddd..4210951 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -52,28 +52,9 @@
 
 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
 
-static int ipv6_rcv_saddr_any(const struct sock *sk)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-
-	return ipv6_addr_any(&np->rcv_saddr);
-}
-
-static unsigned int ipv6_hash_port_and_rcv_saddr(__u16 port,
-						 const struct sock *sk)
-{
-	return port;
-}
-
-const struct udp_get_port_ops udp_ipv6_ops = {
-	.saddr_cmp = ipv6_rcv_saddr_equal,
-	.saddr_any = ipv6_rcv_saddr_any,
-	.hash_port_and_rcv_saddr = ipv6_hash_port_and_rcv_saddr,
-};
-
 static inline int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-	return udp_get_port(sk, snum, &udp_ipv6_ops);
+	return udp_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
 static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 36b0c11..6e252f3 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -6,8 +6,6 @@
 #include <net/addrconf.h>
 #include <net/inet_common.h>
 
-extern const struct udp_get_port_ops udp_ipv6_ops;
-
 extern int  	__udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int );
 extern void 	__udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
 			       int , int , int , __be32 , struct hlist_head []);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index c40a513..f54016a 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -37,7 +37,7 @@ static struct inet6_protocol udplitev6_protocol = {
 
 static int udplite_v6_get_port(struct sock *sk, unsigned short snum)
 {
-	return udplite_get_port(sk, snum, &udp_ipv6_ops);
+	return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal);
 }
 
 struct proto udplitev6_prot = {
-- 
cgit v1.1


From 7c355f532dd43036622e1880c114773463bafd23 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 5 Jun 2007 16:03:03 -0700
Subject: [NET]: Avoid duplicate netlink notification when changing link state

When changing the link state from userspace not affecting any other
flags. Two duplicate notification are being sent, once as action
in the NETDEV_UP/NETDEV_DOWN notification chain and a second time
when comparing old and new device flags after the change has been
completed. Although harmless, the duplicates should be avoided.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 5a7f20f..2609062 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2577,7 +2577,7 @@ unsigned dev_get_flags(const struct net_device *dev)
 
 int dev_change_flags(struct net_device *dev, unsigned flags)
 {
-	int ret;
+	int ret, changes;
 	int old_flags = dev->flags;
 
 	/*
@@ -2632,8 +2632,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
 		dev_set_allmulti(dev, inc);
 	}
 
-	if (old_flags ^ dev->flags)
-		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
+	/* Exclude state transition flags, already notified */
+	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
+	if (changes)
+		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
 
 	return ret;
 }
-- 
cgit v1.1


From b00b4bf94edb42852d55619af453588b2de2dc5e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 5 Jun 2007 16:06:59 -0700
Subject: [NET_SCHED]: Fix filter double free

cbq and atm destroy their filters twice when destroying inner classes
during qdisc destruction.

Reported-and-tested-by: Strobl Anton <a.strobl@aws-it.at>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c | 1 +
 net/sched/sch_cbq.c | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index be7d299..d1c383f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -599,6 +599,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 	/* races ? */
 	while ((flow = p->flows)) {
 		tcf_destroy_chain(flow->filter_list);
+		flow->filter_list = NULL;
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
 			    flow->ref);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index a294542..ee2d596 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1748,10 +1748,12 @@ cbq_destroy(struct Qdisc* sch)
 	 * classes from root to leafs which means that filters can still
 	 * be bound to classes which have been destroyed already. --TGR '04
 	 */
-	for (h = 0; h < 16; h++)
-		for (cl = q->classes[h]; cl; cl = cl->next)
+	for (h = 0; h < 16; h++) {
+		for (cl = q->classes[h]; cl; cl = cl->next) {
 			tcf_destroy_chain(cl->filter_list);
-
+			cl->filter_list = NULL;
+		}
+	}
 	for (h = 0; h < 16; h++) {
 		struct cbq_class *next;
 
-- 
cgit v1.1


From 4aa2e62c45b5ca08be2d0d3c0744d7585b56e860 Mon Sep 17 00:00:00 2001
From: Joy Latten <latten@austin.ibm.com>
Date: Mon, 4 Jun 2007 19:05:57 -0400
Subject: xfrm: Add security check before flushing SAD/SPD

Currently we check for permission before deleting entries from SAD and
SPD, (see security_xfrm_policy_delete() security_xfrm_state_delete())
However we are not checking for authorization when flushing the SPD and
the SAD completely. It was perhaps missed in the original security hooks
patch.

This patch adds a security check when flushing entries from the SAD and
SPD.  It runs the entire database and checks each entry for a denial.
If the process attempting the flush is unable to remove all of the
entries a denial is logged the the flush function returns an error
without removing anything.

This is particularly useful when a process may need to create or delete
its own xfrm entries used for things like labeled networking but that
same process should not be able to delete other entries or flush the
entire database.

Signed-off-by: Joy Latten<latten@austin.ibm.com>
Signed-off-by: Eric Paris <eparis@parisplace.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/net/xfrm.h     |  6 ++---
 net/key/af_key.c       | 10 ++++++--
 net/xfrm/xfrm_policy.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++---
 net/xfrm/xfrm_state.c  | 46 +++++++++++++++++++++++++++++++++---
 net/xfrm/xfrm_user.c   |  9 ++++++--
 5 files changed, 121 insertions(+), 13 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 90185e8..311f25a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -964,7 +964,7 @@ struct xfrmk_spdinfo {
 
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
-extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info);
+extern int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info);
 extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si);
 extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si);
 extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq);
@@ -1020,13 +1020,13 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err);
 struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err);
-void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
+int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
 u32 xfrm_get_acqseq(void);
 void xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi);
 struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 				  xfrm_address_t *daddr, xfrm_address_t *saddr,
 				  int create, unsigned short family);
-extern void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
+extern int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
 extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
 			  struct flowi *fl, int family, int strict);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d302dda..0f8304b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1682,6 +1682,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 	unsigned proto;
 	struct km_event c;
 	struct xfrm_audit audit_info;
+	int err;
 
 	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
 	if (proto == 0)
@@ -1689,7 +1690,9 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd
 
 	audit_info.loginuid = audit_get_loginuid(current->audit_context);
 	audit_info.secid = 0;
-	xfrm_state_flush(proto, &audit_info);
+	err = xfrm_state_flush(proto, &audit_info);
+	if (err)
+		return err;
 	c.data.proto = proto;
 	c.seq = hdr->sadb_msg_seq;
 	c.pid = hdr->sadb_msg_pid;
@@ -2683,10 +2686,13 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 {
 	struct km_event c;
 	struct xfrm_audit audit_info;
+	int err;
 
 	audit_info.loginuid = audit_get_loginuid(current->audit_context);
 	audit_info.secid = 0;
-	xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info);
+	err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info);
+	if (err)
+		return err;
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
 	c.pid = hdr->sadb_msg_pid;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 64a3751..157bfbd 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -834,11 +834,67 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
 
-void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+static inline int
+xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
 {
-	int dir;
+	int dir, err = 0;
+
+	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
+		struct xfrm_policy *pol;
+		struct hlist_node *entry;
+		int i;
+
+		hlist_for_each_entry(pol, entry,
+				     &xfrm_policy_inexact[dir], bydst) {
+			if (pol->type != type)
+				continue;
+			err = security_xfrm_policy_delete(pol);
+			if (err) {
+				xfrm_audit_log(audit_info->loginuid,
+					       audit_info->secid,
+					       AUDIT_MAC_IPSEC_DELSPD, 0,
+					       pol, NULL);
+				return err;
+			}
+                }
+		for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
+			hlist_for_each_entry(pol, entry,
+					     xfrm_policy_bydst[dir].table + i,
+					     bydst) {
+				if (pol->type != type)
+					continue;
+				err = security_xfrm_policy_delete(pol);
+				if (err) {
+					xfrm_audit_log(audit_info->loginuid,
+						       audit_info->secid,
+						       AUDIT_MAC_IPSEC_DELSPD,
+						       0, pol, NULL);
+					return err;
+				}
+			}
+		}
+	}
+	return err;
+}
+#else
+static inline int
+xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
+{
+	return 0;
+}
+#endif
+
+int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
+{
+	int dir, err = 0;
 
 	write_lock_bh(&xfrm_policy_lock);
+
+	err = xfrm_policy_flush_secctx_check(type, audit_info);
+	if (err)
+		goto out;
+
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 		struct xfrm_policy *pol;
 		struct hlist_node *entry;
@@ -891,7 +947,9 @@ void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
 		xfrm_policy_count[dir] -= killed;
 	}
 	atomic_inc(&flow_cache_genid);
+out:
 	write_unlock_bh(&xfrm_policy_lock);
+	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
 
@@ -2583,4 +2641,3 @@ restore_state:
 }
 EXPORT_SYMBOL(xfrm_migrate);
 #endif
-
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 372f06e..85f3f43 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -391,12 +391,48 @@ int xfrm_state_delete(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_delete);
 
-void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+static inline int
+xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
 {
-	int i;
-	int err = 0;
+	int i, err = 0;
+
+	for (i = 0; i <= xfrm_state_hmask; i++) {
+		struct hlist_node *entry;
+		struct xfrm_state *x;
+
+		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+			if (xfrm_id_proto_match(x->id.proto, proto) &&
+			   (err = security_xfrm_state_delete(x)) != 0) {
+				xfrm_audit_log(audit_info->loginuid,
+					       audit_info->secid,
+					       AUDIT_MAC_IPSEC_DELSA,
+                                               0, NULL, x);
+
+				return err;
+			}
+		}
+	}
+
+	return err;
+}
+#else
+static inline int
+xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
+{
+	return 0;
+}
+#endif
+
+int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
+{
+	int i, err = 0;
 
 	spin_lock_bh(&xfrm_state_lock);
+	err = xfrm_state_flush_secctx_check(proto, audit_info);
+	if (err)
+		goto out;
+
 	for (i = 0; i <= xfrm_state_hmask; i++) {
 		struct hlist_node *entry;
 		struct xfrm_state *x;
@@ -419,8 +455,12 @@ restart:
 			}
 		}
 	}
+	err = 0;
+
+out:
 	spin_unlock_bh(&xfrm_state_lock);
 	wake_up(&km_waitq);
+	return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b14c7e5..c06883b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1418,10 +1418,13 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct km_event c;
 	struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
 	struct xfrm_audit audit_info;
+	int err;
 
 	audit_info.loginuid = NETLINK_CB(skb).loginuid;
 	audit_info.secid = NETLINK_CB(skb).sid;
-	xfrm_state_flush(p->proto, &audit_info);
+	err = xfrm_state_flush(p->proto, &audit_info);
+	if (err)
+		return err;
 	c.data.proto = p->proto;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
@@ -1582,7 +1585,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	audit_info.loginuid = NETLINK_CB(skb).loginuid;
 	audit_info.secid = NETLINK_CB(skb).sid;
-	xfrm_policy_flush(type, &audit_info);
+	err = xfrm_policy_flush(type, &audit_info);
+	if (err)
+		return err;
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-- 
cgit v1.1